Commit 4bf093f1a0

LemonBoy <thatlemon@gmail.com>
2021-05-04 18:45:52
compiler-rt: Better selection of __clzsi implementation
To be honest all this detection logic is starting to become a real PITA, the ARM32 version can be possibly removed as the generic version optimizes pretty well...
1 parent 389d117
Changed files (2)
lib
std
special
lib/std/special/compiler_rt/clzsi2.zig
@@ -26,6 +26,8 @@ fn __clzsi2_generic(a: i32) callconv(.C) i32 {
 }
 
 fn __clzsi2_thumb1() callconv(.Naked) void {
+    @setRuntimeSafety(false);
+
     // Similar to the generic version with the last two rounds replaced by a LUT
     asm volatile (
         \\ movs r1, #32
@@ -58,6 +60,8 @@ fn __clzsi2_thumb1() callconv(.Naked) void {
 }
 
 fn __clzsi2_arm32() callconv(.Naked) void {
+    @setRuntimeSafety(false);
+
     asm volatile (
         \\ // Assumption: n != 0
         \\ // r0: n
@@ -104,13 +108,22 @@ fn __clzsi2_arm32() callconv(.Naked) void {
     unreachable;
 }
 
-pub const __clzsi2 = switch (std.Target.current.cpu.arch) {
-    .arm, .armeb => if (std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm))
-        __clzsi2_thumb1
-    else
-        __clzsi2_arm32,
-    .thumb, .thumbeb => __clzsi2_thumb1,
-    else => __clzsi2_generic,
+pub const __clzsi2 = impl: {
+    switch (std.Target.current.cpu.arch) {
+        .arm, .armeb, .thumb, .thumbeb => {
+            const use_thumb1 =
+                (std.Target.current.cpu.arch.isThumb() or
+                std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm)) and
+                !std.Target.arm.featureSetHas(std.Target.current.cpu.features, .thumb2);
+
+            if (use_thumb1) break :impl __clzsi2_thumb1
+            // From here on we're either targeting Thumb2 or ARM.
+            else if (!std.Target.current.cpu.arch.isThumb()) break :impl __clzsi2_arm32
+            // Use the generic implementation otherwise.
+            else break :impl __clzsi2_generic;
+        },
+        else => break :impl __clzsi2_generic,
+    }
 };
 
 test "test clzsi2" {
lib/std/special/compiler_rt/clzsi2_test.zig
@@ -7,6 +7,8 @@ const clzsi2 = @import("clzsi2.zig");
 const testing = @import("std").testing;
 
 fn test__clzsi2(a: u32, expected: i32) void {
+    // XXX At high optimization levels this test may be horribly miscompiled if
+    // one of the naked implementations is selected.
     var nakedClzsi2 = clzsi2.__clzsi2;
     var actualClzsi2 = @ptrCast(fn (a: i32) callconv(.C) i32, nakedClzsi2);
     var x = @bitCast(i32, a);