Commit c619b85f67

Auguste Rame <auguste.rame@gmail.com>
2021-07-26 04:43:52
ctz + clz
1 parent ecca829
Changed files (4)
src/stage1/all_types.hpp
@@ -1907,9 +1907,11 @@ struct ZigLLVMFnKey {
     union {
         struct {
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } ctz;
         struct {
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } clz;
         struct {
             uint32_t bit_count;
src/stage1/analyze.cpp
@@ -7883,9 +7883,11 @@ bool type_id_eql(TypeId const *a, TypeId const *b) {
 uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey const *x) {
     switch (x->id) {
         case ZigLLVMFnIdCtz:
-            return (uint32_t)(x->data.ctz.bit_count) * (uint32_t)810453934;
+            return (uint32_t)(x->data.ctz.bit_count) * (uint32_t)810453934 +
+                   (uint32_t)(x->data.ctz.vector_len) * (((uint32_t)x->id << 5) + 1025);
         case ZigLLVMFnIdClz:
-            return (uint32_t)(x->data.clz.bit_count) * (uint32_t)2428952817;
+            return (uint32_t)(x->data.clz.bit_count) * (uint32_t)2428952817 +
+                   (uint32_t)(x->data.clz.vector_len) * (((uint32_t)x->id << 5) + 1025);
         case ZigLLVMFnIdPopCount:
             return (uint32_t)(x->data.pop_count.bit_count) * (uint32_t)101195049 +
                    (uint32_t)(x->data.pop_count.vector_len) * (((uint32_t)x->id << 5) + 1025);
src/stage1/ir.cpp
@@ -15945,54 +15945,154 @@ static Stage1AirInst *ir_analyze_instruction_optional_unwrap_ptr(IrAnalyze *ira,
 }
 
 static Stage1AirInst *ir_analyze_instruction_ctz(IrAnalyze *ira, Stage1ZirInstCtz *instruction) {
+    Error err;
+    
     ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
     if (type_is_invalid(int_type))
         return ira->codegen->invalid_inst_gen;
 
-    Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
+    Stage1AirInst *uncasted_op = instruction->op->child;
+    if (type_is_invalid(uncasted_op->value->type))
+        return ira->codegen->invalid_inst_gen;
+
+    uint32_t vector_len = UINT32_MAX; // means not a vector
+    if (uncasted_op->value->type->id == ZigTypeIdArray) {
+        bool can_be_vec_elem;
+        if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
+                        &can_be_vec_elem)))
+        {
+            return ira->codegen->invalid_inst_gen;
+        }
+        if (can_be_vec_elem) {
+            vector_len = uncasted_op->value->type->data.array.len;
+        }
+    } else if (uncasted_op->value->type->id == ZigTypeIdVector) {
+        vector_len = uncasted_op->value->type->data.vector.len;
+    }
+
+    bool is_vector = (vector_len != UINT32_MAX);
+    ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
+
+    Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
     if (type_is_invalid(op->value->type))
         return ira->codegen->invalid_inst_gen;
 
     if (int_type->data.integral.bit_count == 0)
         return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
 
+    ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
+
     if (instr_is_comptime(op)) {
         ZigValue *val = ir_resolve_const(ira, op, UndefOk);
         if (val == nullptr)
             return ira->codegen->invalid_inst_gen;
         if (val->special == ConstValSpecialUndef)
             return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
-        size_t result_usize = bigint_ctz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
-        return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
+        
+        if (is_vector) {
+            ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
+            Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
+            expand_undef_array(ira->codegen, val);
+            result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
+            for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
+                ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
+                if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
+                    op_elem_val, UndefOk)))
+                {
+                    return ira->codegen->invalid_inst_gen;
+                }
+                ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
+                result_elem_val->type = smallest_type;
+                result_elem_val->special = op_elem_val->special;
+                if (op_elem_val->special == ConstValSpecialUndef)
+                    continue;
+                size_t value = bigint_ctz(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
+                bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
+            }
+            return result;
+        } else {
+            size_t result_usize = bigint_ctz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
+            return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
+        }
     }
 
-    ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
+    ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
     return ir_build_ctz_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
 }
 
 static Stage1AirInst *ir_analyze_instruction_clz(IrAnalyze *ira, Stage1ZirInstClz *instruction) {
+    Error err;
+    
     ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
     if (type_is_invalid(int_type))
         return ira->codegen->invalid_inst_gen;
 
-    Stage1AirInst *op = ir_implicit_cast(ira, instruction->op->child, int_type);
+    Stage1AirInst *uncasted_op = instruction->op->child;
+    if (type_is_invalid(uncasted_op->value->type))
+        return ira->codegen->invalid_inst_gen;
+
+    uint32_t vector_len = UINT32_MAX; // means not a vector
+    if (uncasted_op->value->type->id == ZigTypeIdArray) {
+        bool can_be_vec_elem;
+        if ((err = is_valid_vector_elem_type(ira->codegen, uncasted_op->value->type->data.array.child_type,
+                        &can_be_vec_elem)))
+        {
+            return ira->codegen->invalid_inst_gen;
+        }
+        if (can_be_vec_elem) {
+            vector_len = uncasted_op->value->type->data.array.len;
+        }
+    } else if (uncasted_op->value->type->id == ZigTypeIdVector) {
+        vector_len = uncasted_op->value->type->data.vector.len;
+    }
+
+    bool is_vector = (vector_len != UINT32_MAX);
+    ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
+
+    Stage1AirInst *op = ir_implicit_cast(ira, uncasted_op, op_type);
     if (type_is_invalid(op->value->type))
         return ira->codegen->invalid_inst_gen;
 
     if (int_type->data.integral.bit_count == 0)
         return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, 0);
 
+    ZigType *smallest_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
+
     if (instr_is_comptime(op)) {
         ZigValue *val = ir_resolve_const(ira, op, UndefOk);
         if (val == nullptr)
             return ira->codegen->invalid_inst_gen;
         if (val->special == ConstValSpecialUndef)
             return ir_const_undef(ira, instruction->base.scope, instruction->base.source_node, ira->codegen->builtin_types.entry_num_lit_int);
-        size_t result_usize = bigint_clz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
-        return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
+        
+        if (is_vector) {
+            ZigType *smallest_vec_type = get_vector_type(ira->codegen, vector_len, smallest_type);
+            Stage1AirInst *result = ir_const(ira, instruction->base.scope, instruction->base.source_node, smallest_vec_type);
+            expand_undef_array(ira->codegen, val);
+            result->value->data.x_array.data.s_none.elements = ira->codegen->pass1_arena->allocate<ZigValue>(smallest_vec_type->data.vector.len);
+            for (unsigned i = 0; i < smallest_vec_type->data.vector.len; i += 1) {
+                ZigValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
+                if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
+                    op_elem_val, UndefOk)))
+                {
+                    return ira->codegen->invalid_inst_gen;
+                }
+                ZigValue *result_elem_val = &result->value->data.x_array.data.s_none.elements[i];
+                result_elem_val->type = smallest_type;
+                result_elem_val->special = op_elem_val->special;
+                if (op_elem_val->special == ConstValSpecialUndef)
+                    continue;
+                size_t value = bigint_clz(&op_elem_val->data.x_bigint, int_type->data.integral.bit_count);
+                bigint_init_unsigned(&result->value->data.x_array.data.s_none.elements[i].data.x_bigint, value);
+            }
+            return result;
+        } else {
+            size_t result_usize = bigint_clz(&op->value->data.x_bigint, int_type->data.integral.bit_count);
+            return ir_const_unsigned(ira, instruction->base.scope, instruction->base.source_node, result_usize);
+        }
     }
 
-    ZigType *return_type = get_smallest_unsigned_int_type(ira->codegen, int_type->data.integral.bit_count);
+    ZigType *return_type = is_vector ? get_vector_type(ira->codegen, vector_len, smallest_type) : smallest_type;
     return ir_build_clz_gen(ira, instruction->base.scope, instruction->base.source_node, return_type, op);
 }
 
test/behavior/math.zig
@@ -123,16 +123,27 @@ test "@clz" {
 }
 
 fn testClz() !void {
-    try expect(clz(u8, 0b10001010) == 0);
-    try expect(clz(u8, 0b00001010) == 4);
-    try expect(clz(u8, 0b00011010) == 3);
-    try expect(clz(u8, 0b00000000) == 8);
-    try expect(clz(u128, 0xffffffffffffffff) == 64);
-    try expect(clz(u128, 0x10000000000000000) == 63);
+    try expect(@clz(u8, 0b10001010) == 0);
+    try expect(@clz(u8, 0b00001010) == 4);
+    try expect(@clz(u8, 0b00011010) == 3);
+    try expect(@clz(u8, 0b00000000) == 8);
+    try expect(@clz(u128, 0xffffffffffffffff) == 64);
+    try expect(@clz(u128, 0x10000000000000000) == 63);
 }
 
-fn clz(comptime T: type, x: T) usize {
-    return @clz(T, x);
+test "@clz vectors" {
+    try testClzVectors();
+    comptime try testClzVectors();
+}
+
+fn testClzVectors() !void {
+    @setEvalBranchQuota(10_000);
+    try expectEqual(@clz(u8, @splat(64, @as(u8, 0b10001010))), @splat(64, @as(u4, 0)));
+    try expectEqual(@clz(u8, @splat(64, @as(u8, 0b00001010))), @splat(64, @as(u4, 4)));
+    try expectEqual(@clz(u8, @splat(64, @as(u8, 0b00011010))), @splat(64, @as(u4, 3)));
+    try expectEqual(@clz(u8, @splat(64, @as(u8, 0b00000000))), @splat(64, @as(u4, 8)));
+    try expectEqual(@clz(u128, @splat(64, @as(u128, 0xffffffffffffffff))), @splat(64, @as(u8, 64)));
+    try expectEqual(@clz(u128, @splat(64, @as(u128, 0x10000000000000000))), @splat(64, @as(u8, 63)));
 }
 
 test "@ctz" {
@@ -141,14 +152,23 @@ test "@ctz" {
 }
 
 fn testCtz() !void {
-    try expect(ctz(u8, 0b10100000) == 5);
-    try expect(ctz(u8, 0b10001010) == 1);
-    try expect(ctz(u8, 0b00000000) == 8);
-    try expect(ctz(u16, 0b00000000) == 16);
+    try expect(@ctz(u8, 0b10100000) == 5);
+    try expect(@ctz(u8, 0b10001010) == 1);
+    try expect(@ctz(u8, 0b00000000) == 8);
+    try expect(@ctz(u16, 0b00000000) == 16);
+}
+
+test "@ctz vectors" {
+    try testClzVectors();
+    comptime try testClzVectors();
 }
 
-fn ctz(comptime T: type, x: T) usize {
-    return @ctz(T, x);
+fn testCtzVectors() !void {
+    @setEvalBranchQuota(10_000);
+    try expectEqual(@ctz(u8, @splat(64, @as(u8, 0b10100000))), @splat(64, @as(u4, 5)));
+    try expectEqual(@ctz(u8, @splat(64, @as(u8, 0b10001010))), @splat(64, @as(u4, 1)));
+    try expectEqual(@ctz(u8, @splat(64, @as(u8, 0b00000000))), @splat(64, @as(u4, 8)));
+    try expectEqual(@ctz(u16, @splat(64, @as(u16, 0b00000000))), @splat(64, @as(u5, 16)));
 }
 
 test "assignment operators" {