Commit 9295355985
Changed files (4)
test
behavior
src/codegen/llvm.zig
@@ -8424,28 +8424,45 @@ pub const FuncGen = struct {
const dest_slice = try self.resolveInst(bin_op.lhs);
const ptr_ty = self.air.typeOf(bin_op.lhs);
const elem_ty = self.air.typeOf(bin_op.rhs);
- const target = self.dg.module.getTarget();
- const val_is_undef = if (self.air.value(bin_op.rhs)) |val| val.isUndefDeep() else false;
+ const module = self.dg.module;
+ const target = module.getTarget();
const dest_ptr_align = ptr_ty.ptrAlignment(target);
const u8_llvm_ty = self.context.intType(8);
const dest_ptr = self.sliceOrArrayPtr(dest_slice, ptr_ty);
const is_volatile = ptr_ty.isVolatilePtr();
- if (val_is_undef) {
- // Even if safety is disabled, we still emit a memset to undefined since it conveys
- // extra information to LLVM. However, safety makes the difference between using
- // 0xaa or actual undefined for the fill byte.
- const fill_byte = if (safety)
- u8_llvm_ty.constInt(0xaa, .False)
- else
- u8_llvm_ty.getUndef();
- const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty);
- _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile);
+ if (self.air.value(bin_op.rhs)) |elem_val| {
+ if (elem_val.isUndefDeep()) {
+ // Even if safety is disabled, we still emit a memset to undefined since it conveys
+ // extra information to LLVM. However, safety makes the difference between using
+ // 0xaa or actual undefined for the fill byte.
+ const fill_byte = if (safety)
+ u8_llvm_ty.constInt(0xaa, .False)
+ else
+ u8_llvm_ty.getUndef();
+ const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty);
+ _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile);
- if (safety and self.dg.module.comp.bin_file.options.valgrind) {
- self.valgrindMarkUndef(dest_ptr, len);
+ if (safety and module.comp.bin_file.options.valgrind) {
+ self.valgrindMarkUndef(dest_ptr, len);
+ }
+ return null;
+ }
+
+ // Test if the element value is compile-time known to be a
+ // repeating byte pattern, for example, `@as(u64, 0)` has a
+ // repeating byte pattern of 0 bytes. In such case, the memset
+ // intrinsic can be used.
+ var value_buffer: Value.Payload.U64 = undefined;
+ if (try elem_val.hasRepeatedByteRepr(elem_ty, module, &value_buffer)) |byte_val| {
+ const fill_byte = try self.resolveValue(.{
+ .ty = Type.u8,
+ .val = byte_val,
+ });
+ const len = self.sliceOrArrayLenInBytes(dest_slice, ptr_ty);
+ _ = self.builder.buildMemSet(dest_ptr, fill_byte, len, dest_ptr_align, is_volatile);
+ return null;
}
- return null;
}
const value = try self.resolveInst(bin_op.rhs);
src/Sema.zig
@@ -26953,9 +26953,11 @@ fn storePtrVal(
defer sema.gpa.free(buffer);
reinterpret.val_ptr.*.writeToMemory(mut_kit.ty, sema.mod, buffer) catch |err| switch (err) {
error.ReinterpretDeclRef => unreachable,
+ error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already
};
operand_val.writeToMemory(operand_ty, sema.mod, buffer[reinterpret.byte_offset..]) catch |err| switch (err) {
error.ReinterpretDeclRef => unreachable,
+ error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already
};
const arena = mut_kit.beginArena(sema.mod);
@@ -27905,6 +27907,7 @@ fn bitCastVal(
defer sema.gpa.free(buffer);
val.writeToMemory(old_ty, sema.mod, buffer) catch |err| switch (err) {
error.ReinterpretDeclRef => return null,
+ error.IllDefinedMemoryLayout => unreachable, // Sema was supposed to emit a compile error already
};
return try Value.readFromMemory(new_ty, sema.mod, buffer[buffer_offset..], sema.arena);
}
src/value.zig
@@ -1278,7 +1278,10 @@ pub const Value = extern union {
///
/// Asserts that buffer.len >= ty.abiSize(). The buffer is allowed to extend past
/// the end of the value in memory.
- pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{ReinterpretDeclRef}!void {
+ pub fn writeToMemory(val: Value, ty: Type, mod: *Module, buffer: []u8) error{
+ ReinterpretDeclRef,
+ IllDefinedMemoryLayout,
+ }!void {
const target = mod.getTarget();
const endian = target.cpu.arch.endian();
if (val.isUndef()) {
@@ -1345,7 +1348,7 @@ pub const Value = extern union {
return writeToPackedMemory(val, ty, mod, buffer[0..byte_count], 0);
},
.Struct => switch (ty.containerLayout()) {
- .Auto => unreachable, // Sema is supposed to have emitted a compile error already
+ .Auto => return error.IllDefinedMemoryLayout,
.Extern => {
const fields = ty.structFields().values();
const field_vals = val.castTag(.aggregate).?.data;
@@ -1366,7 +1369,7 @@ pub const Value = extern union {
std.mem.writeInt(Int, buffer[0..@sizeOf(Int)], @intCast(Int, int), endian);
},
.Union => switch (ty.containerLayout()) {
- .Auto => unreachable,
+ .Auto => return error.IllDefinedMemoryLayout,
.Extern => @panic("TODO implement writeToMemory for extern unions"),
.Packed => {
const byte_count = (@intCast(usize, ty.bitSize(target)) + 7) / 8;
@@ -5381,6 +5384,35 @@ pub const Value = extern union {
}
}
+ /// If the value is represented in-memory as a series of bytes that all
+ /// have the same value, return that byte value, otherwise null.
+ pub fn hasRepeatedByteRepr(val: Value, ty: Type, mod: *Module, value_buffer: *Payload.U64) !?Value {
+ const target = mod.getTarget();
+ const abi_size = ty.abiSize(target);
+ assert(abi_size >= 1);
+ const byte_buffer = try mod.gpa.alloc(u8, abi_size);
+ defer mod.gpa.free(byte_buffer);
+
+ writeToMemory(val, ty, mod, byte_buffer) catch |err| switch (err) {
+ error.ReinterpretDeclRef => return null,
+ // TODO: The writeToMemory function was originally created for the purpose
+ // of comptime pointer casting. However, it is now additionally being used
+ // for checking the actual memory layout that will be generated by machine
+ // code late in compilation. So, this error handling is too aggressive and
+ // causes some false negatives, causing less-than-ideal code generation.
+ error.IllDefinedMemoryLayout => return null,
+ };
+ const first_byte = byte_buffer[0];
+ for (byte_buffer[1..]) |byte| {
+ if (byte != first_byte) return null;
+ }
+ value_buffer.* = .{
+ .base = .{ .tag = .int_u64 },
+ .data = first_byte,
+ };
+ return initPayload(&value_buffer.base);
+ }
+
/// This type is not copyable since it may contain pointers to its inner data.
pub const Payload = struct {
tag: Tag,
test/behavior/memset.zig
@@ -94,7 +94,7 @@ test "memset with 1-byte array element" {
try expect(buf[4][0]);
}
-test "memset with large array element" {
+test "memset with large array element, runtime known" {
const A = [128]u64;
var buf: [5]A = undefined;
var runtime_known_element = [_]u64{0} ** 128;
@@ -106,6 +106,18 @@ test "memset with large array element" {
for (buf[4]) |elem| try expect(elem == 0);
}
+test "memset with large array element, comptime known" {
+ const A = [128]u64;
+ var buf: [5]A = undefined;
+ const comptime_known_element = [_]u64{0} ** 128;
+ @memset(&buf, comptime_known_element);
+ for (buf[0]) |elem| try expect(elem == 0);
+ for (buf[1]) |elem| try expect(elem == 0);
+ for (buf[2]) |elem| try expect(elem == 0);
+ for (buf[3]) |elem| try expect(elem == 0);
+ for (buf[4]) |elem| try expect(elem == 0);
+}
+
test "memcpy and memset intrinsics" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;