master
1const std = @import("std");
2const builtin = @import("builtin");
3const common = @import("common.zig");
4
5pub const panic = common.panic;
6
7comptime {
8 @export(&__clzsi2, .{ .name = "__clzsi2", .linkage = common.linkage, .visibility = common.visibility });
9 @export(&__clzdi2, .{ .name = "__clzdi2", .linkage = common.linkage, .visibility = common.visibility });
10 @export(&__clzti2, .{ .name = "__clzti2", .linkage = common.linkage, .visibility = common.visibility });
11 @export(&__ctzsi2, .{ .name = "__ctzsi2", .linkage = common.linkage, .visibility = common.visibility });
12 @export(&__ctzdi2, .{ .name = "__ctzdi2", .linkage = common.linkage, .visibility = common.visibility });
13 @export(&__ctzti2, .{ .name = "__ctzti2", .linkage = common.linkage, .visibility = common.visibility });
14 @export(&__ffssi2, .{ .name = "__ffssi2", .linkage = common.linkage, .visibility = common.visibility });
15 @export(&__ffsdi2, .{ .name = "__ffsdi2", .linkage = common.linkage, .visibility = common.visibility });
16 @export(&__ffsti2, .{ .name = "__ffsti2", .linkage = common.linkage, .visibility = common.visibility });
17}
18
19// clz - count leading zeroes
20// - clzXi2 for unoptimized little and big endian
21// - __clzsi2_thumb1: assume a != 0
22// - __clzsi2_arm32: assume a != 0
23
24// ctz - count trailing zeroes
25// - ctzXi2 for unoptimized little and big endian
26
27// ffs - find first set
28// * ffs = (a == 0) => 0, (a != 0) => ctz + 1
29// * dont pay for `if (x == 0) return shift;` inside ctz
30// - ffsXi2 for unoptimized little and big endian
31
32inline fn clzXi2(comptime T: type, a: T) i32 {
33 var x = switch (@bitSizeOf(T)) {
34 32 => @as(u32, @bitCast(a)),
35 64 => @as(u64, @bitCast(a)),
36 128 => @as(u128, @bitCast(a)),
37 else => unreachable,
38 };
39 var n: T = @bitSizeOf(T);
40 // Count first bit set using binary search, from Hacker's Delight
41 var y: @TypeOf(x) = 0;
42 comptime var shift: u8 = @bitSizeOf(T);
43 inline while (shift > 0) {
44 shift = shift >> 1;
45 y = x >> shift;
46 if (y != 0) {
47 n = n - shift;
48 x = y;
49 }
50 }
51 return @intCast(n - @as(T, @bitCast(x)));
52}
53
54fn __clzsi2_thumb1() callconv(.naked) void {
55 @setRuntimeSafety(false);
56
57 // Similar to the generic version with the last two rounds replaced by a LUT
58 asm volatile (
59 \\ movs r1, #32
60 \\ lsrs r2, r0, #16
61 \\ beq 1f
62 \\ subs r1, #16
63 \\ movs r0, r2
64 \\ 1:
65 \\ lsrs r2, r0, #8
66 \\ beq 1f
67 \\ subs r1, #8
68 \\ movs r0, r2
69 \\ 1:
70 \\ lsrs r2, r0, #4
71 \\ beq 1f
72 \\ subs r1, #4
73 \\ movs r0, r2
74 \\ 1:
75 \\ adr r3, .lut
76 \\ ldrb r0, [r3, r0]
77 \\ subs r0, r1, r0
78 \\ bx lr
79 \\ .p2align 2
80 \\ // Number of bits set in the 0-15 range
81 \\ .lut:
82 \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
83 );
84
85 unreachable;
86}
87
88fn __clzsi2_arm32() callconv(.naked) void {
89 @setRuntimeSafety(false);
90
91 asm volatile (
92 \\ // Assumption: n != 0
93 \\ // r0: n
94 \\ // r1: count of leading zeros in n + 1
95 \\ // r2: scratch register for shifted r0
96 \\ mov r1, #1
97 \\
98 \\ // Basic block:
99 \\ // if ((r0 >> SHIFT) == 0)
100 \\ // r1 += SHIFT;
101 \\ // else
102 \\ // r0 >>= SHIFT;
103 \\ // for descending powers of two as SHIFT.
104 \\ lsrs r2, r0, #16
105 \\ movne r0, r2
106 \\ addeq r1, #16
107 \\
108 \\ lsrs r2, r0, #8
109 \\ movne r0, r2
110 \\ addeq r1, #8
111 \\
112 \\ lsrs r2, r0, #4
113 \\ movne r0, r2
114 \\ addeq r1, #4
115 \\
116 \\ lsrs r2, r0, #2
117 \\ movne r0, r2
118 \\ addeq r1, #2
119 \\
120 \\ // The basic block invariants at this point are (r0 >> 2) == 0 and
121 \\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
122 \\ //
123 \\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
124 \\ // ---+----------------+----------------+------------+--------------
125 \\ // 1 | 1 | 0 | 0 | 1
126 \\ // 2 | 0 | 1 | -1 | 0
127 \\ // 3 | 0 | 1 | -1 | 0
128 \\ //
129 \\ // The r1's initial value of 1 compensates for the 1 here.
130 \\ sub r0, r1, r0, lsr #1
131 \\ bx lr
132 );
133
134 unreachable;
135}
136
137fn clzsi2_generic(a: i32) callconv(.c) i32 {
138 return clzXi2(i32, a);
139}
140
141pub const __clzsi2 = switch (builtin.cpu.arch) {
142 .arm, .armeb, .thumb, .thumbeb => impl: {
143 const use_thumb1 =
144 (builtin.cpu.arch.isThumb() or builtin.cpu.has(.arm, .noarm)) and !builtin.cpu.has(.arm, .thumb2);
145
146 if (use_thumb1) {
147 break :impl __clzsi2_thumb1;
148 }
149 // From here on we're either targeting Thumb2 or ARM.
150 else if (!builtin.cpu.arch.isThumb()) {
151 break :impl __clzsi2_arm32;
152 }
153 // Use the generic implementation otherwise.
154 else break :impl clzsi2_generic;
155 },
156 else => clzsi2_generic,
157};
158
159pub fn __clzdi2(a: i64) callconv(.c) i32 {
160 return clzXi2(i64, a);
161}
162
163pub fn __clzti2(a: i128) callconv(.c) i32 {
164 return clzXi2(i128, a);
165}
166
167inline fn ctzXi2(comptime T: type, a: T) i32 {
168 var x = switch (@bitSizeOf(T)) {
169 32 => @as(u32, @bitCast(a)),
170 64 => @as(u64, @bitCast(a)),
171 128 => @as(u128, @bitCast(a)),
172 else => unreachable,
173 };
174 var n: T = 1;
175 // Number of trailing zeroes as binary search, from Hacker's Delight
176 var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
177 comptime var shift = @bitSizeOf(T);
178 if (x == 0) return shift;
179 inline while (shift > 1) {
180 shift = shift >> 1;
181 mask = mask >> shift;
182 if ((x & mask) == 0) {
183 n = n + shift;
184 x = x >> shift;
185 }
186 }
187 return @intCast(n - @as(T, @bitCast((x & 1))));
188}
189
190pub fn __ctzsi2(a: i32) callconv(.c) i32 {
191 return ctzXi2(i32, a);
192}
193
194pub fn __ctzdi2(a: i64) callconv(.c) i32 {
195 return ctzXi2(i64, a);
196}
197
198pub fn __ctzti2(a: i128) callconv(.c) i32 {
199 return ctzXi2(i128, a);
200}
201
202inline fn ffsXi2(comptime T: type, a: T) i32 {
203 var x: std.meta.Int(.unsigned, @typeInfo(T).int.bits) = @bitCast(a);
204 var n: T = 1;
205 // adapted from Number of trailing zeroes (see ctzXi2)
206 var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
207 comptime var shift = @bitSizeOf(T);
208 // In contrast to ctz return 0
209 if (x == 0) return 0;
210 inline while (shift > 1) {
211 shift = shift >> 1;
212 mask = mask >> shift;
213 if ((x & mask) == 0) {
214 n = n + shift;
215 x = x >> shift;
216 }
217 }
218 // return ctz + 1
219 return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + 1;
220}
221
222pub fn __ffssi2(a: i32) callconv(.c) i32 {
223 return ffsXi2(i32, a);
224}
225
226pub fn __ffsdi2(a: i64) callconv(.c) i32 {
227 return ffsXi2(i64, a);
228}
229
230pub fn __ffsti2(a: i128) callconv(.c) i32 {
231 return ffsXi2(i128, a);
232}
233
234test {
235 _ = @import("clzsi2_test.zig");
236 _ = @import("clzdi2_test.zig");
237 _ = @import("clzti2_test.zig");
238
239 _ = @import("ctzsi2_test.zig");
240 _ = @import("ctzdi2_test.zig");
241 _ = @import("ctzti2_test.zig");
242
243 _ = @import("ffssi2_test.zig");
244 _ = @import("ffsdi2_test.zig");
245 _ = @import("ffsti2_test.zig");
246}