master
  1const std = @import("std");
  2const builtin = @import("builtin");
  3const common = @import("common.zig");
  4
  5pub const panic = common.panic;
  6
  7comptime {
  8    @export(&__clzsi2, .{ .name = "__clzsi2", .linkage = common.linkage, .visibility = common.visibility });
  9    @export(&__clzdi2, .{ .name = "__clzdi2", .linkage = common.linkage, .visibility = common.visibility });
 10    @export(&__clzti2, .{ .name = "__clzti2", .linkage = common.linkage, .visibility = common.visibility });
 11    @export(&__ctzsi2, .{ .name = "__ctzsi2", .linkage = common.linkage, .visibility = common.visibility });
 12    @export(&__ctzdi2, .{ .name = "__ctzdi2", .linkage = common.linkage, .visibility = common.visibility });
 13    @export(&__ctzti2, .{ .name = "__ctzti2", .linkage = common.linkage, .visibility = common.visibility });
 14    @export(&__ffssi2, .{ .name = "__ffssi2", .linkage = common.linkage, .visibility = common.visibility });
 15    @export(&__ffsdi2, .{ .name = "__ffsdi2", .linkage = common.linkage, .visibility = common.visibility });
 16    @export(&__ffsti2, .{ .name = "__ffsti2", .linkage = common.linkage, .visibility = common.visibility });
 17}
 18
 19// clz - count leading zeroes
 20// - clzXi2 for unoptimized little and big endian
 21// - __clzsi2_thumb1: assume a != 0
 22// - __clzsi2_arm32: assume a != 0
 23
 24// ctz - count trailing zeroes
 25// - ctzXi2 for unoptimized little and big endian
 26
 27// ffs - find first set
 28// * ffs = (a == 0) => 0, (a != 0) => ctz + 1
 29// * dont pay for `if (x == 0) return shift;` inside ctz
 30// - ffsXi2 for unoptimized little and big endian
 31
 32inline fn clzXi2(comptime T: type, a: T) i32 {
 33    var x = switch (@bitSizeOf(T)) {
 34        32 => @as(u32, @bitCast(a)),
 35        64 => @as(u64, @bitCast(a)),
 36        128 => @as(u128, @bitCast(a)),
 37        else => unreachable,
 38    };
 39    var n: T = @bitSizeOf(T);
 40    // Count first bit set using binary search, from Hacker's Delight
 41    var y: @TypeOf(x) = 0;
 42    comptime var shift: u8 = @bitSizeOf(T);
 43    inline while (shift > 0) {
 44        shift = shift >> 1;
 45        y = x >> shift;
 46        if (y != 0) {
 47            n = n - shift;
 48            x = y;
 49        }
 50    }
 51    return @intCast(n - @as(T, @bitCast(x)));
 52}
 53
 54fn __clzsi2_thumb1() callconv(.naked) void {
 55    @setRuntimeSafety(false);
 56
 57    // Similar to the generic version with the last two rounds replaced by a LUT
 58    asm volatile (
 59        \\ movs r1, #32
 60        \\ lsrs r2, r0, #16
 61        \\ beq 1f
 62        \\ subs r1, #16
 63        \\ movs r0, r2
 64        \\ 1:
 65        \\ lsrs r2, r0, #8
 66        \\ beq 1f
 67        \\ subs r1, #8
 68        \\ movs r0, r2
 69        \\ 1:
 70        \\ lsrs r2, r0, #4
 71        \\ beq 1f
 72        \\ subs r1, #4
 73        \\ movs r0, r2
 74        \\ 1:
 75        \\ adr r3, .lut
 76        \\ ldrb r0, [r3, r0]
 77        \\ subs r0, r1, r0
 78        \\ bx lr
 79        \\ .p2align 2
 80        \\ // Number of bits set in the 0-15 range
 81        \\ .lut:
 82        \\ .byte 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4
 83    );
 84
 85    unreachable;
 86}
 87
 88fn __clzsi2_arm32() callconv(.naked) void {
 89    @setRuntimeSafety(false);
 90
 91    asm volatile (
 92        \\ // Assumption: n != 0
 93        \\ // r0: n
 94        \\ // r1: count of leading zeros in n + 1
 95        \\ // r2: scratch register for shifted r0
 96        \\ mov r1, #1
 97        \\
 98        \\ // Basic block:
 99        \\ // if ((r0 >> SHIFT) == 0)
100        \\ //   r1 += SHIFT;
101        \\ // else
102        \\ //   r0 >>= SHIFT;
103        \\ // for descending powers of two as SHIFT.
104        \\ lsrs r2, r0, #16
105        \\ movne r0, r2
106        \\ addeq r1, #16
107        \\
108        \\ lsrs r2, r0, #8
109        \\ movne r0, r2
110        \\ addeq r1, #8
111        \\
112        \\ lsrs r2, r0, #4
113        \\ movne r0, r2
114        \\ addeq r1, #4
115        \\
116        \\ lsrs r2, r0, #2
117        \\ movne r0, r2
118        \\ addeq r1, #2
119        \\
120        \\ // The basic block invariants at this point are (r0 >> 2) == 0 and
121        \\ // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
122        \\ //
123        \\ // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)f
124        \\ // ---+----------------+----------------+------------+--------------
125        \\ // 1  | 1              | 0              | 0          | 1
126        \\ // 2  | 0              | 1              | -1         | 0
127        \\ // 3  | 0              | 1              | -1         | 0
128        \\ //
129        \\ // The r1's initial value of 1 compensates for the 1 here.
130        \\ sub r0, r1, r0, lsr #1
131        \\ bx lr
132    );
133
134    unreachable;
135}
136
137fn clzsi2_generic(a: i32) callconv(.c) i32 {
138    return clzXi2(i32, a);
139}
140
141pub const __clzsi2 = switch (builtin.cpu.arch) {
142    .arm, .armeb, .thumb, .thumbeb => impl: {
143        const use_thumb1 =
144            (builtin.cpu.arch.isThumb() or builtin.cpu.has(.arm, .noarm)) and !builtin.cpu.has(.arm, .thumb2);
145
146        if (use_thumb1) {
147            break :impl __clzsi2_thumb1;
148        }
149        // From here on we're either targeting Thumb2 or ARM.
150        else if (!builtin.cpu.arch.isThumb()) {
151            break :impl __clzsi2_arm32;
152        }
153        // Use the generic implementation otherwise.
154        else break :impl clzsi2_generic;
155    },
156    else => clzsi2_generic,
157};
158
159pub fn __clzdi2(a: i64) callconv(.c) i32 {
160    return clzXi2(i64, a);
161}
162
163pub fn __clzti2(a: i128) callconv(.c) i32 {
164    return clzXi2(i128, a);
165}
166
167inline fn ctzXi2(comptime T: type, a: T) i32 {
168    var x = switch (@bitSizeOf(T)) {
169        32 => @as(u32, @bitCast(a)),
170        64 => @as(u64, @bitCast(a)),
171        128 => @as(u128, @bitCast(a)),
172        else => unreachable,
173    };
174    var n: T = 1;
175    // Number of trailing zeroes as binary search, from Hacker's Delight
176    var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
177    comptime var shift = @bitSizeOf(T);
178    if (x == 0) return shift;
179    inline while (shift > 1) {
180        shift = shift >> 1;
181        mask = mask >> shift;
182        if ((x & mask) == 0) {
183            n = n + shift;
184            x = x >> shift;
185        }
186    }
187    return @intCast(n - @as(T, @bitCast((x & 1))));
188}
189
190pub fn __ctzsi2(a: i32) callconv(.c) i32 {
191    return ctzXi2(i32, a);
192}
193
194pub fn __ctzdi2(a: i64) callconv(.c) i32 {
195    return ctzXi2(i64, a);
196}
197
198pub fn __ctzti2(a: i128) callconv(.c) i32 {
199    return ctzXi2(i128, a);
200}
201
202inline fn ffsXi2(comptime T: type, a: T) i32 {
203    var x: std.meta.Int(.unsigned, @typeInfo(T).int.bits) = @bitCast(a);
204    var n: T = 1;
205    // adapted from Number of trailing zeroes (see ctzXi2)
206    var mask: @TypeOf(x) = std.math.maxInt(@TypeOf(x));
207    comptime var shift = @bitSizeOf(T);
208    // In contrast to ctz return 0
209    if (x == 0) return 0;
210    inline while (shift > 1) {
211        shift = shift >> 1;
212        mask = mask >> shift;
213        if ((x & mask) == 0) {
214            n = n + shift;
215            x = x >> shift;
216        }
217    }
218    // return ctz + 1
219    return @as(i32, @intCast(n - @as(T, @bitCast((x & 1))))) + 1;
220}
221
222pub fn __ffssi2(a: i32) callconv(.c) i32 {
223    return ffsXi2(i32, a);
224}
225
226pub fn __ffsdi2(a: i64) callconv(.c) i32 {
227    return ffsXi2(i64, a);
228}
229
230pub fn __ffsti2(a: i128) callconv(.c) i32 {
231    return ffsXi2(i128, a);
232}
233
234test {
235    _ = @import("clzsi2_test.zig");
236    _ = @import("clzdi2_test.zig");
237    _ = @import("clzti2_test.zig");
238
239    _ = @import("ctzsi2_test.zig");
240    _ = @import("ctzdi2_test.zig");
241    _ = @import("ctzti2_test.zig");
242
243    _ = @import("ffssi2_test.zig");
244    _ = @import("ffsdi2_test.zig");
245    _ = @import("ffsti2_test.zig");
246}