Commit b945d3eb90

Jacob Young <jacobly0@users.noreply.github.com>
2022-11-01 01:18:15
cbe: improve support for non-native float types
* Fix _start on aarch64. * Add fallbacks when a float type is unsupported. Fixes #13357
1 parent 0bbb000
Changed files (5)
lib
src
codegen
test
lib/include/zig.h
@@ -20,7 +20,7 @@
 
 #if __STDC_VERSION__ >= 201112L
 #define zig_threadlocal thread_local
-#elif __GNUC__
+#elif defined(__GNUC__)
 #define zig_threadlocal __thread
 #elif _MSC_VER
 #define zig_threadlocal __declspec(thread)
@@ -28,7 +28,7 @@
 #define zig_threadlocal zig_threadlocal_unavailable
 #endif
 
-#if zig_has_attribute(naked)
+#if zig_has_attribute(naked) || defined(__GNUC__)
 #define zig_naked __attribute__((naked))
 #elif defined(_MSC_VER)
 #define zig_naked __declspec(naked)
@@ -128,7 +128,7 @@
 #define   zig_atomic_store(obj, arg, order) atomic_store_explicit     (obj, arg, order)
 #define    zig_atomic_load(obj,      order) atomic_load_explicit      (obj,      order)
 #define zig_fence(order) atomic_thread_fence(order)
-#elif __GNUC__
+#elif defined(__GNUC__)
 #define memory_order_relaxed __ATOMIC_RELAXED
 #define memory_order_consume __ATOMIC_CONSUME
 #define memory_order_acquire __ATOMIC_ACQUIRE
@@ -176,7 +176,7 @@
 
 #if __STDC_VERSION__ >= 201112L
 #define zig_noreturn _Noreturn void
-#elif zig_has_attribute(noreturn)
+#elif zig_has_attribute(noreturn) || defined(__GNUC__)
 #define zig_noreturn __attribute__((noreturn)) void
 #elif _MSC_VER
 #define zig_noreturn __declspec(noreturn) void
@@ -184,6 +184,9 @@
 #define zig_noreturn void
 #endif
 
+#define zig_concat(lhs, rhs) lhs##rhs
+#define zig_expand_concat(lhs, rhs) zig_concat(lhs, rhs)
+
 #define zig_bitSizeOf(T) (CHAR_BIT * sizeof(T))
 
 typedef void zig_void;
@@ -250,110 +253,17 @@ typedef  int64_t zig_i64;
 #define zig_minInt_i64  INT64_MIN
 #define zig_maxInt_i64  INT64_MAX
 
-#define zig_builtin_f16(name) __##name##h
-#define zig_builtin_constant_f16(name) zig_suffix_f16(__builtin_##name)
-#if FLT_MANT_DIG == 11
-typedef float zig_f16;
-#define zig_suffix_f16(x) x##f
-#elif DBL_MANT_DIG == 11
-typedef double zig_f16;
-#define zig_suffix_f16(x) x
-#elif LDBL_MANT_DIG == 11
-typedef long double zig_f16;
-#define zig_suffix_f16(x) x##l
-#elif FLT16_MANT_DIG == 11
-typedef _Float16 zig_f16;
-#define zig_suffix_f16(x) x##f16
-#elif defined(__SIZEOF_FP16__)
-typedef __fp16 zig_f16;
-#define zig_suffix_f16(x) x##f16
-#endif
-
-#define zig_builtin_f32(name) name##f
-#define zig_builtin_constant_f32(name) zig_suffix_f32(__builtin_##name)
-#if FLT_MANT_DIG == 24
-typedef float zig_f32;
-#define zig_suffix_f32(x) x##f
-#elif DBL_MANT_DIG == 24
-typedef double zig_f32;
-#define zig_suffix_f32(x) x
-#elif LDBL_MANT_DIG == 24
-typedef long double zig_f32;
-#define zig_suffix_f32(x) x##l
-#elif FLT32_MANT_DIG == 24
-typedef _Float32 zig_f32;
-#define zig_suffix_f32(x) x##f32
-#endif
-
-#define zig_builtin_f64(name) name
-#define zig_builtin_constant_f64(name) zig_suffix_f64(__builtin_##name)
-#if FLT_MANT_DIG == 53
-typedef float zig_f64;
-#define zig_suffix_f64(x) x##f
-#elif DBL_MANT_DIG == 53
-typedef double zig_f64;
-#define zig_suffix_f64(x) x
-#elif LDBL_MANT_DIG == 53
-typedef long double zig_f64;
-#define zig_suffix_f64(x) x##l
-#elif FLT64_MANT_DIG == 53
-typedef _Float64 zig_f64;
-#define zig_suffix_f64(x) x##f64
-#elif FLT32X_MANT_DIG == 53
-typedef _Float32x zig_f64;
-#define zig_suffix_f64(x) x##f32x
-#endif
-
-#define zig_builtin_f80(name) __##name##x
-#define zig_builtin_constant_f80(name) zig_suffix_f80(__builtin_##name)
-#if FLT_MANT_DIG == 64
-typedef float zig_f80;
-#define zig_suffix_f80(x) x##f
-#elif DBL_MANT_DIG == 64
-typedef double zig_f80;
-#define zig_suffix_f80(x) x
-#elif LDBL_MANT_DIG == 64
-typedef long double zig_f80;
-#define zig_suffix_f80(x) x##l
-#elif FLT80_MANT_DIG == 64
-typedef _Float80 zig_f80;
-#define zig_suffix_f80(x) x##f80
-#elif FLT64X_MANT_DIG == 64
-typedef _Float64x zig_f80;
-#define zig_suffix_f80(x) x##f64x
-#elif defined(__SIZEOF_FLOAT80__)
-typedef __float80 zig_f80;
-#define zig_suffix_f80(x) x##l
-#endif
-
-#define zig_builtin_f128(name) name##q
-#define zig_builtin_constant_f128(name) zig_suffix_f80(__builtin_##name)
-#if FLT_MANT_DIG == 113
-typedef float zig_f128;
-#define zig_suffix_f128(x) x##f
-#elif DBL_MANT_DIG == 113
-typedef double zig_f128;
-#define zig_suffix_f128(x) x
-#elif LDBL_MANT_DIG == 113
-typedef long double zig_f128;
-#define zig_suffix_f128(x) x##l
-#elif FLT128_MANT_DIG == 113
-typedef _Float128 zig_f128;
-#define zig_suffix_f128(x) x##f128
-#elif FLT64X_MANT_DIG == 113
-typedef _Float64x zig_f128;
-#define zig_suffix_f128(x) x##f64x
-#elif defined(__SIZEOF_FLOAT128__)
-typedef __float128 zig_f128;
-#define zig_suffix_f128(x) x##q
-#undef zig_builtin_constant_f128
-#define zig_builtin_constant_f128(name) __builtin_##name##f128
-#endif
-
-typedef long double zig_c_longdouble;
-#define zig_suffix_c_longdouble(x) x##l
-#define zig_builtin_c_longdouble(name) zig_suffix_c_longdouble(name)
-#define zig_builtin_constant_c_longdouble(name) zig_suffix_c_longdouble(__builtin_##name)
+#define zig_compiler_rt_abbrev_u32  si
+#define zig_compiler_rt_abbrev_i32  si
+#define zig_compiler_rt_abbrev_u64  di
+#define zig_compiler_rt_abbrev_i64  di
+#define zig_compiler_rt_abbrev_u128 ti
+#define zig_compiler_rt_abbrev_i128 ti
+#define zig_compiler_rt_abbrev_f16  hf
+#define zig_compiler_rt_abbrev_f32  sf
+#define zig_compiler_rt_abbrev_f64  df
+#define zig_compiler_rt_abbrev_f80  xf
+#define zig_compiler_rt_abbrev_f128 tf
 
 zig_extern_c void *memcpy (void *zig_restrict, void const *zig_restrict, zig_usize);
 zig_extern_c void *memset (void *, int, zig_usize);
@@ -361,7 +271,9 @@ zig_extern_c void *memset (void *, int, zig_usize);
 /* ==================== 8/16/32/64-bit Integer Routines ===================== */
 
 #define zig_maxInt(Type, bits) zig_shr_##Type(zig_maxInt_##Type, (zig_bitSizeOf(zig_##Type) - bits))
+#define zig_expand_maxInt(Type, bits) zig_maxInt(Type, bits)
 #define zig_minInt(Type, bits) zig_not_##Type(zig_maxInt(Type, bits), bits)
+#define zig_expand_minInt(Type, bits) zig_minInt(Type, bits)
 
 #define zig_int_operator(Type, RhsType, operation, operator) \
     static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##RhsType rhs) { \
@@ -954,8 +866,8 @@ static inline zig_u16 zig_bit_reverse_u16(zig_u16 val, zig_u8 bits) {
 #if zig_has_builtin(bitreverse16)
     full_res = __builtin_bitreverse16(val);
 #else
-    full_res = (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  0)) <<  8 |
-               (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  8)) >>  0;
+    full_res = (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  0), 8) <<  8 |
+               (zig_u16)zig_bit_reverse_u8((zig_u8)(val >>  8), 8) >>  0;
 #endif
     return zig_wrap_u16(full_res >> (16 - bits), bits);
 }
@@ -969,8 +881,8 @@ static inline zig_u32 zig_bit_reverse_u32(zig_u32 val, zig_u8 bits) {
 #if zig_has_builtin(bitreverse32)
     full_res = __builtin_bitreverse32(val);
 #else
-    full_res = (zig_u32)zig_bit_reverse_u16((zig_u16)(val >>  0)) << 16 |
-               (zig_u32)zig_bit_reverse_u16((zig_u16)(val >> 16)) >>  0;
+    full_res = (zig_u32)zig_bit_reverse_u16((zig_u16)(val >>  0), 16) << 16 |
+               (zig_u32)zig_bit_reverse_u16((zig_u16)(val >> 16), 16) >>  0;
 #endif
     return zig_wrap_u32(full_res >> (32 - bits), bits);
 }
@@ -984,8 +896,8 @@ static inline zig_u64 zig_bit_reverse_u64(zig_u64 val, zig_u8 bits) {
 #if zig_has_builtin(bitreverse64)
     full_res = __builtin_bitreverse64(val);
 #else
-    full_res = (zig_u64)zig_bit_reverse_u32((zig_u32)(val >>  0)) << 32 |
-               (zig_u64)zig_bit_reverse_u32((zig_u32)(val >> 32)) >>  0;
+    full_res = (zig_u64)zig_bit_reverse_u32((zig_u32)(val >>  0), 32) << 32 |
+               (zig_u64)zig_bit_reverse_u32((zig_u32)(val >> 32), 32) >>  0;
 #endif
     return zig_wrap_u64(full_res >> (64 - bits), bits);
 }
@@ -1019,7 +931,7 @@ typedef   signed __int128 zig_i128;
 #define zig_bitcast_i128(val) ((zig_i128)(val))
 #define zig_cmp_int128(Type) \
     static inline zig_i8 zig_cmp_##Type(zig_##Type lhs, zig_##Type rhs) { \
-        return (lhs > rhs) - (lhs < rhs); \
+        return (zig_i8)((lhs > rhs) - (lhs < rhs)); \
     }
 #define zig_bit_int128(Type, operation, operator) \
     static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
@@ -1211,6 +1123,22 @@ static inline zig_i128 zig_mod_i128(zig_i128 lhs, zig_i128 rhs) {
 #define zig_div_floor_u128 zig_div_trunc_u128
 #define zig_mod_u128 zig_rem_u128
 
+static inline zig_u128 zig_min_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_cmp_u128(lhs, rhs) < 0 ? lhs : rhs;
+}
+
+static inline zig_i128 zig_min_i128(zig_i128 lhs, zig_i128 rhs) {
+    return zig_cmp_i128(lhs, rhs) < 0 ? lhs : rhs;
+}
+
+static inline zig_u128 zig_max_u128(zig_u128 lhs, zig_u128 rhs) {
+    return zig_cmp_u128(lhs, rhs) > 0 ? lhs : rhs;
+}
+
+static inline zig_i128 zig_max_i128(zig_i128 lhs, zig_i128 rhs) {
+    return zig_cmp_i128(lhs, rhs) > 0 ? lhs : rhs;
+}
+
 static inline zig_i128 zig_shr_i128(zig_i128 lhs, zig_u8 rhs) {
     zig_i128 sign_mask = zig_cmp_i128(lhs, zig_as_i128(0, 0)) < zig_as_i8(0) ? -zig_as_i128(0, 1) : zig_as_i128(0, 0);
     return zig_xor_i128(zig_bitcast_i128(zig_shr_u128(zig_bitcast_u128(zig_xor_i128(lhs, sign_mask)), rhs)), sign_mask);
@@ -1460,36 +1388,285 @@ static inline zig_i128 zig_bit_reverse_i128(zig_i128 val, zig_u8 bits) {
     return zig_bit_reverse_u128(zig_bitcast_u128(val), bits);
 }
 
-/* ========================== Float Point Routines ========================== */
+/* ========================= Floating Point Support ========================= */
+
+#define zig_has_f16 1
+#define zig_bitSizeOf_f16 16
+#define zig_libc_name_f16(name) __##name##h
+#define zig_as_special_f16(sign, name, arg, repr) sign zig_as_f16(__builtin_##name, )(arg)
+#if FLT_MANT_DIG == 11
+typedef float zig_f16;
+#define zig_as_f16(fp, repr) fp##f
+#elif DBL_MANT_DIG == 11
+typedef double zig_f16;
+#define zig_as_f16(fp, repr) fp
+#elif LDBL_MANT_DIG == 11
+#define zig_bitSizeOf_c_longdouble 16
+typedef long double zig_f16;
+#define zig_as_f16(fp, repr) fp##l
+#elif FLT16_MANT_DIG == 11
+typedef _Float16 zig_f16;
+#define zig_as_f16(fp, repr) fp##f16
+#elif defined(__SIZEOF_FP16__)
+typedef __fp16 zig_f16;
+#define zig_as_f16(fp, repr) fp##f16
+#else
+#undef zig_has_f16
+#define zig_has_f16 0
+#define zig_repr_f16 i16
+typedef zig_i16 zig_f16;
+#define zig_as_f16(fp, repr) repr
+#undef zig_as_special_f16
+#define zig_as_special_f16(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f32 1
+#define zig_bitSizeOf_f32 32
+#define zig_libc_name_f32(name) name##f
+#define zig_as_special_f32(sign, name, arg, repr) sign zig_as_f32(__builtin_##name, )(arg)
+#if FLT_MANT_DIG == 24
+typedef float zig_f32;
+#define zig_as_f32(fp, repr) fp##f
+#elif DBL_MANT_DIG == 24
+typedef double zig_f32;
+#define zig_as_f32(fp, repr) fp
+#elif LDBL_MANT_DIG == 24
+#define zig_bitSizeOf_c_longdouble 32
+typedef long double zig_f32;
+#define zig_as_f32(fp, repr) fp##l
+#elif FLT32_MANT_DIG == 24
+typedef _Float32 zig_f32;
+#define zig_as_f32(fp, repr) fp##f32
+#else
+#undef zig_has_f32
+#define zig_has_f32 0
+#define zig_repr_f32 i32
+typedef zig_i32 zig_f32;
+#define zig_as_f32(fp, repr) repr
+#undef zig_as_special_f32
+#define zig_as_special_f32(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f64 1
+#define zig_bitSizeOf_f64 64
+#define zig_libc_name_f64(name) name
+#define zig_as_special_f64(sign, name, arg, repr) sign zig_as_f64(__builtin_##name, )(arg)
+#if FLT_MANT_DIG == 53
+typedef float zig_f64;
+#define zig_as_f64(fp, repr) fp##f
+#elif DBL_MANT_DIG == 53
+typedef double zig_f64;
+#define zig_as_f64(fp, repr) fp
+#elif LDBL_MANT_DIG == 53
+#define zig_bitSizeOf_c_longdouble 64
+typedef long double zig_f64;
+#define zig_as_f64(fp, repr) fp##l
+#elif FLT64_MANT_DIG == 53
+typedef _Float64 zig_f64;
+#define zig_as_f64(fp, repr) fp##f64
+#elif FLT32X_MANT_DIG == 53
+typedef _Float32x zig_f64;
+#define zig_as_f64(fp, repr) fp##f32x
+#else
+#undef zig_has_f64
+#define zig_has_f64 0
+#define zig_repr_f64 i64
+typedef zig_i64 zig_f64;
+#define zig_as_f64(fp, repr) repr
+#undef zig_as_special_f64
+#define zig_as_special_f64(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f80 1
+#define zig_bitSizeOf_f80 80
+#define zig_libc_name_f80(name) __##name##x
+#define zig_as_special_f80(sign, name, arg, repr) sign zig_as_f80(__builtin_##name, )(arg)
+#if FLT_MANT_DIG == 64
+typedef float zig_f80;
+#define zig_as_f80(fp, repr) fp##f
+#elif DBL_MANT_DIG == 64
+typedef double zig_f80;
+#define zig_as_f80(fp, repr) fp
+#elif LDBL_MANT_DIG == 64
+#define zig_bitSizeOf_c_longdouble 80
+typedef long double zig_f80;
+#define zig_as_f80(fp, repr) fp##l
+#elif FLT80_MANT_DIG == 64
+typedef _Float80 zig_f80;
+#define zig_as_f80(fp, repr) fp##f80
+#elif FLT64X_MANT_DIG == 64
+typedef _Float64x zig_f80;
+#define zig_as_f80(fp, repr) fp##f64x
+#elif defined(__SIZEOF_FLOAT80__)
+typedef __float80 zig_f80;
+#define zig_as_f80(fp, repr) fp##l
+#else
+#undef zig_has_f80
+#define zig_has_f80 0
+#define zig_repr_f80 i128
+typedef zig_i128 zig_f80;
+#define zig_as_f80(fp, repr) repr
+#undef zig_as_special_f80
+#define zig_as_special_f80(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_f128 1
+#define zig_bitSizeOf_f128 128
+#define zig_libc_name_f128(name) name##q
+#define zig_as_special_f128(sign, name, arg, repr) sign zig_as_f128(__builtin_##name, )(arg)
+#if FLT_MANT_DIG == 113
+typedef float zig_f128;
+#define zig_as_f128(fp, repr) fp##f
+#elif DBL_MANT_DIG == 113
+typedef double zig_f128;
+#define zig_as_f128(fp, repr) fp
+#elif LDBL_MANT_DIG == 113
+#define zig_bitSizeOf_c_longdouble 128
+typedef long double zig_f128;
+#define zig_as_f128(fp, repr) fp##l
+#elif FLT128_MANT_DIG == 113
+typedef _Float128 zig_f128;
+#define zig_as_f128(fp, repr) fp##f128
+#elif FLT64X_MANT_DIG == 113
+typedef _Float64x zig_f128;
+#define zig_as_f128(fp, repr) fp##f64x
+#elif defined(__SIZEOF_FLOAT128__)
+typedef __float128 zig_f128;
+#define zig_as_f128(fp, repr) fp##q
+#undef zig_as_special_f128
+#define zig_as_special_f128(sign, name, arg, repr) sign __builtin_##name##f128(arg)
+#else
+#undef zig_has_f128
+#define zig_has_f128 0
+#define zig_repr_f128 i128
+typedef zig_i128 zig_f128;
+#define zig_as_f128(fp, repr) repr
+#undef zig_as_special_f128
+#define zig_as_special_f128(sign, name, arg, repr) repr
+#endif
+
+#define zig_has_c_longdouble 1
+typedef long double zig_c_longdouble;
+#define zig_as_c_longdouble(fp, repr) fp##l
+#define zig_libc_name_c_longdouble(name) name##l
+#define zig_as_special_c_longdouble(sign, name, arg, repr) sign __builtin_##name##l(arg)
+
+#define zig_convert_builtin(ResType, operation, ArgType, version) \
+    zig_extern_c zig_##ResType zig_expand_concat(zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##ArgType), zig_compiler_rt_abbrev_##ResType), version)(zig_##ArgType);
+zig_convert_builtin(f16,  trunc,  f32,  2)
+zig_convert_builtin(f16,  trunc,  f64,  2)
+zig_convert_builtin(f16,  trunc,  f80,  2)
+zig_convert_builtin(f16,  trunc,  f128, 2)
+zig_convert_builtin(f32,  extend, f16,  2)
+zig_convert_builtin(f32,  trunc,  f64,  2)
+zig_convert_builtin(f32,  trunc,  f80,  2)
+zig_convert_builtin(f32,  trunc,  f128, 2)
+zig_convert_builtin(f64,  extend, f16,  2)
+zig_convert_builtin(f64,  extend, f32,  2)
+zig_convert_builtin(f64,  trunc,  f80,  2)
+zig_convert_builtin(f64,  trunc,  f128, 2)
+zig_convert_builtin(f80,  extend, f16,  2)
+zig_convert_builtin(f80,  extend, f32,  2)
+zig_convert_builtin(f80,  extend, f64,  2)
+zig_convert_builtin(f80,  trunc,  f128, 2)
+zig_convert_builtin(f128, extend, f16,  2)
+zig_convert_builtin(f128, extend, f32,  2)
+zig_convert_builtin(f128, extend, f64,  2)
+zig_convert_builtin(f128, extend, f80,  2)
+
+#define zig_float_negate_builtin_0(Type) \
+    static inline zig_##Type zig_neg_##Type(zig_##Type arg) { \
+        return zig_expand_concat(zig_xor_, zig_repr_##Type)(arg, zig_expand_minInt(zig_repr_##Type, zig_bitSizeOf_##Type)); \
+    }
+#define zig_float_negate_builtin_1(Type) \
+    static inline zig_##Type zig_neg_##Type(zig_##Type arg) { \
+        return -arg; \
+    }
+
+#define zig_float_less_builtin_0(Type, operation) \
+    zig_extern_c zig_i8 zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##Type), 2)(zig_##Type, zig_##Type); \
+    static inline zig_i8 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (zig_i8)zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_##Type), 2)(lhs, rhs); \
+    }
+#define zig_float_less_builtin_1(Type, operation) \
+    static inline zig_i8 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (zig_i8)(!(lhs <= rhs) - (lhs < rhs)); \
+    }
+
+#define zig_float_greater_builtin_0(Type, operation) \
+    zig_float_less_builtin_0(Type, operation)
+#define zig_float_greater_builtin_1(Type, operation) \
+    static inline zig_i8 zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return (zig_i8)((lhs > rhs) - !(lhs >= rhs)); \
+    }
+
+#define zig_float_binary_builtin_0(Type, operation, operator) \
+    zig_extern_c zig_##Type zig_expand_concat(zig_expand_concat(__##operation, \
+        zig_compiler_rt_abbrev_##Type), 3)(zig_##Type, zig_##Type); \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return zig_expand_concat(zig_expand_concat(__##operation, zig_compiler_rt_abbrev_##Type), 3)(lhs, rhs); \
+    }
+#define zig_float_binary_builtin_1(Type, operation, operator) \
+    static inline zig_##Type zig_##operation##_##Type(zig_##Type lhs, zig_##Type rhs) { \
+        return lhs operator rhs; \
+    }
 
 #define zig_float_builtins(Type) \
-    zig_extern_c zig_##Type zig_builtin_##Type(sqrt)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(sin)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(cos)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(tan)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(exp)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(exp2)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(log)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(log2)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(log10)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(fabs)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(floor)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(ceil)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(round)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(trunc)(zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(fmod)(zig_##Type, zig_##Type); \
-    zig_extern_c zig_##Type zig_builtin_##Type(fma)(zig_##Type, zig_##Type, zig_##Type); \
+    zig_convert_builtin(i32,  fix,     Type, ) \
+    zig_convert_builtin(u32,  fixuns,  Type, ) \
+    zig_convert_builtin(i64,  fix,     Type, ) \
+    zig_convert_builtin(u64,  fixuns,  Type, ) \
+    zig_convert_builtin(i128, fix,     Type, ) \
+    zig_convert_builtin(u128, fixuns,  Type, ) \
+    zig_convert_builtin(Type, float,   i32,  ) \
+    zig_convert_builtin(Type, floatun, u32,  ) \
+    zig_convert_builtin(Type, float,   i64,  ) \
+    zig_convert_builtin(Type, floatun, u64,  ) \
+    zig_convert_builtin(Type, float,   i128, ) \
+    zig_convert_builtin(Type, floatun, u128, ) \
+    zig_expand_concat(zig_float_negate_builtin_,  zig_has_##Type)(Type) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, cmp) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, ne) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, eq) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, lt) \
+    zig_expand_concat(zig_float_less_builtin_,    zig_has_##Type)(Type, le) \
+    zig_expand_concat(zig_float_greater_builtin_, zig_has_##Type)(Type, gt) \
+    zig_expand_concat(zig_float_greater_builtin_, zig_has_##Type)(Type, ge) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, add, +) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, sub, -) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, mul, *) \
+    zig_expand_concat(zig_float_binary_builtin_,  zig_has_##Type)(Type, div, /) \
+    zig_extern_c zig_##Type zig_libc_name_##Type(sqrt)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(sin)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(cos)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(tan)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(exp)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(exp2)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(log)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(log2)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(log10)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(fabs)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(floor)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(ceil)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(round)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(trunc)(zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(fmod)(zig_##Type, zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(fmin)(zig_##Type, zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(fmax)(zig_##Type, zig_##Type); \
+    zig_extern_c zig_##Type zig_libc_name_##Type(fma)(zig_##Type, zig_##Type, zig_##Type); \
 \
     static inline zig_##Type zig_div_trunc_##Type(zig_##Type lhs, zig_##Type rhs) { \
-        return zig_builtin_##Type(trunc)(lhs / rhs); \
+        return zig_libc_name_##Type(trunc)(zig_div_##Type(lhs, rhs)); \
     } \
 \
     static inline zig_##Type zig_div_floor_##Type(zig_##Type lhs, zig_##Type rhs) { \
-        return zig_builtin_##Type(floor)(lhs / rhs); \
+        return zig_libc_name_##Type(floor)(zig_div_##Type(lhs, rhs)); \
     } \
 \
     static inline zig_##Type zig_mod_##Type(zig_##Type lhs, zig_##Type rhs) { \
-        return lhs - zig_div_floor_##Type(lhs, rhs) * rhs; \
+        return zig_sub_##Type(lhs, zig_mul_##Type(zig_div_floor_##Type(lhs, rhs), rhs)); \
     }
 zig_float_builtins(f16)
 zig_float_builtins(f32)
lib/std/start.zig
@@ -281,7 +281,15 @@ fn _start() callconv(.Naked) noreturn {
                     \\ andl $-16, %%esp
                     \\ jmp _posixCallMainAndExit
                 ),
-                .aarch64, .aarch64_be, .arm, .armeb, .thumb => asm volatile (
+                .aarch64, .aarch64_be => asm volatile (
+                    \\ mov fp, #0
+                    \\ mov lr, #0
+                    \\ mov x0, sp
+                    \\ adrp x1, argc_argv_ptr
+                    \\ str x0, [x1, :lo12:argc_argv_ptr]
+                    \\ b _posixCallMainAndExit
+                ),
+                .arm, .armeb, .thumb => asm volatile (
                     \\ mov fp, #0
                     \\ mov lr, #0
                     \\ str sp, argc_argv_ptr
lib/test_runner.zig
@@ -130,7 +130,8 @@ pub fn main2() anyerror!void {
     }
     if (builtin.zig_backend == .stage2_wasm or
         builtin.zig_backend == .stage2_x86_64 or
-        builtin.zig_backend == .stage2_llvm)
+        builtin.zig_backend == .stage2_llvm or
+        builtin.zig_backend == .stage2_c)
     {
         const passed = builtin.test_functions.len - skipped - failed;
         const stderr = std.io.getStdErr();
src/codegen/c.zig
@@ -19,6 +19,7 @@ const Liveness = @import("../Liveness.zig");
 const CType = @import("../type.zig").CType;
 
 const Mutability = enum { Const, ConstArgument, Mut };
+const BigIntLimb = std.math.big.Limb;
 const BigInt = std.math.big.int;
 
 pub const CValue = union(enum) {
@@ -608,19 +609,25 @@ pub const DeclGen = struct {
                 .Bool => return dg.renderValue(writer, ty, Value.@"false", location),
                 .Int, .Enum, .ErrorSet => return writer.print("{x}", .{try dg.fmtIntLiteral(ty, val)}),
                 .Float => {
+                    const bits = ty.floatBits(target);
+                    var int_pl = Type.Payload.Bits{ .base = .{ .tag = .int_signed }, .data = bits };
+                    const int_ty = Type.initPayload(&int_pl.base);
+
                     try writer.writeByte('(');
                     try dg.renderTypecast(writer, ty);
-                    try writer.writeAll(")zig_suffix_");
+                    try writer.writeAll(")zig_as_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
                     try writer.writeByte('(');
-                    switch (ty.floatBits(target)) {
-                        16 => try writer.print("{x}", .{@bitCast(f16, undefPattern(u16))}),
-                        32 => try writer.print("{x}", .{@bitCast(f32, undefPattern(u32))}),
-                        64 => try writer.print("{x}", .{@bitCast(f64, undefPattern(u64))}),
-                        80 => try writer.print("{x}", .{@bitCast(f80, undefPattern(u80))}),
-                        128 => try writer.print("{x}", .{@bitCast(f128, undefPattern(u128))}),
+                    switch (bits) {
+                        16 => try writer.print("{x}", .{@bitCast(f16, undefPattern(i16))}),
+                        32 => try writer.print("{x}", .{@bitCast(f32, undefPattern(i32))}),
+                        64 => try writer.print("{x}", .{@bitCast(f64, undefPattern(i64))}),
+                        80 => try writer.print("{x}", .{@bitCast(f80, undefPattern(i80))}),
+                        128 => try writer.print("{x}", .{@bitCast(f128, undefPattern(i128))}),
                         else => unreachable,
                     }
+                    try writer.writeAll(", ");
+                    try dg.renderValue(writer, int_ty, Value.undef, .FunctionArgument);
                     return writer.writeByte(')');
                 },
                 .Pointer => if (ty.isSlice()) {
@@ -770,21 +777,48 @@ pub const DeclGen = struct {
                 else => try writer.print("{}", .{try dg.fmtIntLiteral(ty, val)}),
             },
             .Float => {
+                const bits = ty.floatBits(target);
+                const f128_val = val.toFloat(f128);
+
+                var int_ty_pl = Type.Payload.Bits{ .base = .{ .tag = .int_signed }, .data = bits };
+                const int_ty = Type.initPayload(&int_ty_pl.base);
+
+                assert(bits <= 128);
+                var int_val_limbs: [BigInt.calcTwosCompLimbCount(128)]BigIntLimb = undefined;
+                var int_val_big = BigInt.Mutable{
+                    .limbs = &int_val_limbs,
+                    .len = undefined,
+                    .positive = undefined,
+                };
+
+                switch (bits) {
+                    16 => int_val_big.set(@bitCast(i16, val.toFloat(f16))),
+                    32 => int_val_big.set(@bitCast(i32, val.toFloat(f32))),
+                    64 => int_val_big.set(@bitCast(i64, val.toFloat(f64))),
+                    80 => int_val_big.set(@bitCast(i80, val.toFloat(f80))),
+                    128 => int_val_big.set(@bitCast(i128, f128_val)),
+                    else => unreachable,
+                }
+
+                var int_val_pl = Value.Payload.BigInt{
+                    .base = .{ .tag = if (int_val_big.positive) .int_big_positive else .int_big_negative },
+                    .data = int_val_big.limbs[0..int_val_big.len],
+                };
+                const int_val = Value.initPayload(&int_val_pl.base);
+
                 try writer.writeByte('(');
                 try dg.renderTypecast(writer, ty);
                 try writer.writeByte(')');
-                const f128_val = val.toFloat(f128);
-                if (std.math.signbit(f128_val)) try writer.writeByte('-');
                 if (std.math.isFinite(f128_val)) {
-                    try writer.writeAll("zig_suffix_");
+                    try writer.writeAll("zig_as_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
                     try writer.writeByte('(');
-                    switch (ty.floatBits(target)) {
-                        16 => try writer.print("{x}", .{@fabs(val.toFloat(f16))}),
-                        32 => try writer.print("{x}", .{@fabs(val.toFloat(f32))}),
-                        64 => try writer.print("{x}", .{@fabs(val.toFloat(f64))}),
-                        80 => try writer.print("{x}", .{@fabs(val.toFloat(f80))}),
-                        128 => try writer.print("{x}", .{@fabs(f128_val)}),
+                    switch (bits) {
+                        16 => try writer.print("{x}", .{val.toFloat(f16)}),
+                        32 => try writer.print("{x}", .{val.toFloat(f32)}),
+                        64 => try writer.print("{x}", .{val.toFloat(f64)}),
+                        80 => try writer.print("{x}", .{val.toFloat(f80)}),
+                        128 => try writer.print("{x}", .{f128_val}),
                         else => unreachable,
                     }
                 } else {
@@ -796,23 +830,26 @@ pub const DeclGen = struct {
                         "inf"
                     else
                         unreachable;
-                    try writer.writeAll("zig_builtin_constant_");
+
+                    try writer.writeAll("zig_as_special_");
                     try dg.renderTypeForBuiltinFnName(writer, ty);
                     try writer.writeByte('(');
+                    if (std.math.signbit(f128_val)) try writer.writeByte('-');
+                    try writer.writeAll(", ");
                     try writer.writeAll(operation);
-                    try writer.writeAll(")(");
-                    if (std.math.isNan(f128_val)) switch (ty.floatBits(target)) {
+                    try writer.writeAll(", ");
+                    if (std.math.isNan(f128_val)) switch (bits) {
                         // We only actually need to pass the significand, but it will get
                         // properly masked anyway, so just pass the whole value.
-                        16 => try writer.print("\"0x{x}\"", .{@bitCast(u16, @fabs(val.toFloat(f16)))}),
-                        32 => try writer.print("\"0x{x}\"", .{@bitCast(u32, @fabs(val.toFloat(f32)))}),
-                        64 => try writer.print("\"0x{x}\"", .{@bitCast(u64, @fabs(val.toFloat(f64)))}),
-                        80 => try writer.print("\"0x{x}\"", .{@bitCast(u80, @fabs(val.toFloat(f80)))}),
-                        128 => try writer.print("\"0x{x}\"", .{@bitCast(u128, @fabs(f128_val))}),
+                        16 => try writer.print("\"0x{x}\"", .{@bitCast(u16, val.toFloat(f16))}),
+                        32 => try writer.print("\"0x{x}\"", .{@bitCast(u32, val.toFloat(f32))}),
+                        64 => try writer.print("\"0x{x}\"", .{@bitCast(u64, val.toFloat(f64))}),
+                        80 => try writer.print("\"0x{x}\"", .{@bitCast(u80, val.toFloat(f80))}),
+                        128 => try writer.print("\"0x{x}\"", .{@bitCast(u128, f128_val)}),
                         else => unreachable,
                     };
                 }
-                return writer.writeByte(')');
+                return writer.print(", {x})", .{try dg.fmtIntLiteral(int_ty, int_val)});
             },
             .Pointer => switch (val.tag()) {
                 .null_value, .zero => if (ty.isSlice()) {
@@ -2299,11 +2336,14 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
 
             // TODO use a different strategy for add, sub, mul, div
             // that communicates to the optimizer that wrapping is UB.
-            .add                   => try airBinOp(f, inst, "+", "add", .None),
-            .sub                   => try airBinOp(f, inst, "-", "sub", .None),
-            .mul                   => try airBinOp(f, inst, "*", "mul", .None),
-            .div_float, .div_exact => try airBinOp(f, inst, "/", "div_trunc", .None),
+            .add => try airBinOp(f, inst, "+", "add", .None),
+            .sub => try airBinOp(f, inst, "-", "sub", .None),
+            .mul => try airBinOp(f, inst, "*", "mul", .None),
+
+            .neg => try airFloatNeg(f, inst),
+            .div_float => try airBinBuiltinCall(f, inst, "div", .None),
 
+            .div_trunc, .div_exact => try airBinOp(f, inst, "/", "div_trunc", .None),
             .rem => blk: {
                 const bin_op = f.air.instructions.items(.data)[inst].bin_op;
                 const lhs_ty = f.air.typeOf(bin_op.lhs);
@@ -2314,16 +2354,6 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
                 else
                     try airBinFloatOp(f, inst, "fmod");
             },
-            .div_trunc => blk: {
-                const bin_op = f.air.instructions.items(.data)[inst].bin_op;
-                const lhs_ty = f.air.typeOf(bin_op.lhs);
-                // For binary operations @TypeOf(lhs)==@TypeOf(rhs),
-                // so we only check one.
-                break :blk if (lhs_ty.isInt())
-                    try airBinOp(f, inst, "/", "div_trunc", .None)
-                else
-                    try airBinBuiltinCall(f, inst, "div_trunc", .None);
-            },
             .div_floor => try airBinBuiltinCall(f, inst, "div_floor", .None),
             .mod       => try airBinBuiltinCall(f, inst, "mod", .None),
 
@@ -2336,8 +2366,6 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .mul_sat => try airBinBuiltinCall(f, inst, "muls", .Bits),
             .shl_sat => try airBinBuiltinCall(f, inst, "shls", .Bits),
 
-            .neg => try airNeg(f, inst),
-
             .sqrt,
             .sin,
             .cos,
@@ -2361,18 +2389,18 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .mul_with_overflow => try airOverflow(f, inst, "mul", .Bits),
             .shl_with_overflow => try airOverflow(f, inst, "shl", .Bits),
 
-            .min => try airMinMax(f, inst, '<'),
-            .max => try airMinMax(f, inst, '>'),
+            .min => try airMinMax(f, inst, '<', "fmin"),
+            .max => try airMinMax(f, inst, '>', "fmax"),
 
             .slice => try airSlice(f, inst),
 
-            .cmp_gt  => try airCmpOp(f, inst, ">"),
-            .cmp_gte => try airCmpOp(f, inst, ">="),
-            .cmp_lt  => try airCmpOp(f, inst, "<"),
-            .cmp_lte => try airCmpOp(f, inst, "<="),
+            .cmp_gt  => try airCmpOp(f, inst, ">",  "gt"),
+            .cmp_gte => try airCmpOp(f, inst, ">=", "ge"),
+            .cmp_lt  => try airCmpOp(f, inst, "<",  "lt"),
+            .cmp_lte => try airCmpOp(f, inst, "<=", "le"),
 
-            .cmp_eq  => try airEquality(f, inst, "((", "=="),
-            .cmp_neq => try airEquality(f, inst, "!((", "!="),
+            .cmp_eq  => try airEquality(f, inst,  "((", "==", "eq"),
+            .cmp_neq => try airEquality(f, inst, "!((", "!=", "ne"),
 
             .cmp_vector => return f.fail("TODO: C backend: implement cmp_vector", .{}),
             .cmp_lt_errors_len => try airCmpLtErrorsLen(f, inst),
@@ -2468,7 +2496,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .float_to_int,
             .fptrunc,
             .fpext,
-            => try airSimpleCast(f, inst),
+            => try airFloatCast(f, inst),
 
             .ptrtoint => try airPtrToInt(f, inst),
 
@@ -2973,8 +3001,7 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
 
         const src_bits = src_ty.bitSize(target);
 
-        const Limb = std.math.big.Limb;
-        const ExpectedContents = [BigInt.Managed.default_capacity]Limb;
+        const ExpectedContents = [BigInt.Managed.default_capacity]BigIntLimb;
         var stack align(@alignOf(ExpectedContents)) =
             std.heap.stackFallback(@sizeOf(ExpectedContents), f.object.dg.gpa);
 
@@ -3081,7 +3108,7 @@ fn airBinOp(
 
     const operand_ty = f.air.typeOf(bin_op.lhs);
     const target = f.object.dg.module.getTarget();
-    if (operand_ty.isInt() and operand_ty.bitSize(target) > 64)
+    if ((operand_ty.isInt() and operand_ty.bitSize(target) > 64) or operand_ty.isRuntimeFloat())
         return try airBinBuiltinCall(f, inst, operation, info);
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -3102,7 +3129,7 @@ fn airBinOp(
     return local;
 }
 
-fn airCmpOp(f: *Function, inst: Air.Inst.Index, operator: []const u8) !CValue {
+fn airCmpOp(f: *Function, inst: Air.Inst.Index, operator: []const u8, operation: []const u8) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;
 
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
@@ -3110,7 +3137,9 @@ fn airCmpOp(f: *Function, inst: Air.Inst.Index, operator: []const u8) !CValue {
     const operand_ty = f.air.typeOf(bin_op.lhs);
     const target = f.object.dg.module.getTarget();
     if (operand_ty.isInt() and operand_ty.bitSize(target) > 64)
-        return try airCmpBuiltinCall(f, inst, operator);
+        return try airCmpBuiltinCall(f, inst, operator, "cmp");
+    if (operand_ty.isRuntimeFloat())
+        return try airCmpBuiltinCall(f, inst, operator, operation);
 
     const inst_ty = f.air.typeOfIndex(inst);
     const lhs = try f.resolveInst(bin_op.lhs);
@@ -3134,11 +3163,20 @@ fn airEquality(
     f: *Function,
     inst: Air.Inst.Index,
     negate_prefix: []const u8,
-    eq_op_str: []const u8,
+    operator: []const u8,
+    operation: []const u8,
 ) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;
 
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
+
+    const operand_ty = f.air.typeOf(bin_op.lhs);
+    const target = f.object.dg.module.getTarget();
+    if (operand_ty.isInt() and operand_ty.bitSize(target) > 64)
+        return try airCmpBuiltinCall(f, inst, operator, "cmp");
+    if (operand_ty.isRuntimeFloat())
+        return try airCmpBuiltinCall(f, inst, operator, operation);
+
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
 
@@ -3148,8 +3186,7 @@ fn airEquality(
 
     try writer.writeAll(" = ");
 
-    const lhs_ty = f.air.typeOf(bin_op.lhs);
-    if (lhs_ty.tag() == .optional) {
+    if (operand_ty.tag() == .optional) {
         // (A && B)  || (C && (A == B))
         // A = lhs.is_null  ;  B = rhs.is_null  ;  C = rhs.payload == lhs.payload
 
@@ -3172,7 +3209,7 @@ fn airEquality(
 
     try f.writeCValue(writer, lhs, .Other);
     try writer.writeByte(' ');
-    try writer.writeAll(eq_op_str);
+    try writer.writeAll(operator);
     try writer.writeByte(' ');
     try f.writeCValue(writer, rhs, .Other);
     try writer.writeAll(";\n");
@@ -3231,15 +3268,22 @@ fn airPtrAddSub(f: *Function, inst: Air.Inst.Index, operator: u8) !CValue {
     return local;
 }
 
-fn airMinMax(f: *Function, inst: Air.Inst.Index, operator: u8) !CValue {
+fn airMinMax(f: *Function, inst: Air.Inst.Index, operator: u8, operation: []const u8) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;
 
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
+
+    const inst_ty = f.air.typeOfIndex(inst);
+    const target = f.object.dg.module.getTarget();
+    if (inst_ty.isInt() and inst_ty.bitSize(target) > 64)
+        return try airBinBuiltinCall(f, inst, operation[1..], .None);
+    if (inst_ty.isRuntimeFloat())
+        return try airBinFloatOp(f, inst, operation);
+
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
 
     const writer = f.object.writer();
-    const inst_ty = f.air.typeOfIndex(inst);
     const local = try f.allocLocal(inst_ty, .Const);
 
     // (lhs <> rhs) ? lhs : rhs
@@ -4518,19 +4562,44 @@ fn airArrayToSlice(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-/// Emits a local variable with the result type and initializes it
-/// with the operand.
-fn airSimpleCast(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airFloatCast(f: *Function, inst: Air.Inst.Index) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;
 
     const inst_ty = f.air.typeOfIndex(inst);
-    const local = try f.allocLocal(inst_ty, .Const);
     const ty_op = f.air.instructions.items(.data)[inst].ty_op;
-    const writer = f.object.writer();
     const operand = try f.resolveInst(ty_op.operand);
+    const operand_ty = f.air.typeOf(ty_op.operand);
+    const target = f.object.dg.module.getTarget();
+    const operation = if (inst_ty.isRuntimeFloat() and operand_ty.isRuntimeFloat())
+        if (inst_ty.floatBits(target) < operand_ty.floatBits(target)) "trunc" else "extend"
+    else if (inst_ty.isInt() and operand_ty.isRuntimeFloat())
+        if (inst_ty.isSignedInt()) "fix" else "fixuns"
+    else if (inst_ty.isRuntimeFloat() and operand_ty.isInt())
+        if (operand_ty.isSignedInt()) "float" else "floatun"
+    else
+        unreachable;
+
+    const local = try f.allocLocal(inst_ty, .Const);
+    const writer = f.object.writer();
 
     try writer.writeAll(" = ");
-    try f.writeCValue(writer, operand, .Other);
+    if (inst_ty.isInt() and operand_ty.isRuntimeFloat()) {
+        try writer.writeAll("zig_wrap_");
+        try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty);
+        try writer.writeByte('(');
+    }
+    try writer.writeAll("__");
+    try writer.writeAll(operation);
+    try writer.writeAll(compilerRtAbbrev(operand_ty, target));
+    try writer.writeAll(compilerRtAbbrev(inst_ty, target));
+    if (inst_ty.isRuntimeFloat() and operand_ty.isRuntimeFloat()) try writer.writeByte('2');
+    try writer.writeByte('(');
+    try f.writeCValue(writer, operand, .FunctionArgument);
+    try writer.writeByte(')');
+    if (inst_ty.isInt() and operand_ty.isRuntimeFloat()) {
+        try f.object.dg.renderBuiltinInfo(writer, inst_ty, .Bits);
+        try writer.writeByte(')');
+    }
     try writer.writeAll(";\n");
     return local;
 }
@@ -4604,7 +4673,12 @@ fn airBinBuiltinCall(
     return local;
 }
 
-fn airCmpBuiltinCall(f: *Function, inst: Air.Inst.Index, operator: []const u8) !CValue {
+fn airCmpBuiltinCall(
+    f: *Function,
+    inst: Air.Inst.Index,
+    operator: []const u8,
+    operation: []const u8,
+) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;
 
     const inst_ty = f.air.typeOfIndex(inst);
@@ -4613,7 +4687,9 @@ fn airCmpBuiltinCall(f: *Function, inst: Air.Inst.Index, operator: []const u8) !
 
     const local = try f.allocLocal(inst_ty, .Const);
     const writer = f.object.writer();
-    try writer.writeAll(" = zig_cmp_");
+    try writer.writeAll(" = zig_");
+    try writer.writeAll(operation);
+    try writer.writeByte('_');
     try f.object.dg.renderTypeForBuiltinFnName(writer, operand_ty);
     try writer.writeByte('(');
     try f.writeCValue(writer, try f.resolveInst(bin_op.lhs), .FunctionArgument);
@@ -5157,17 +5233,21 @@ fn airWasmMemoryGrow(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
-fn airNeg(f: *Function, inst: Air.Inst.Index) !CValue {
+fn airFloatNeg(f: *Function, inst: Air.Inst.Index) !CValue {
     if (f.liveness.isUnused(inst)) return CValue.none;
 
-    const un_op = f.air.instructions.items(.data)[inst].un_op;
-    const writer = f.object.writer();
     const inst_ty = f.air.typeOfIndex(inst);
+    const un_op = f.air.instructions.items(.data)[inst].un_op;
     const operand = try f.resolveInst(un_op);
+    const operand_ty = f.air.typeOf(un_op);
+
     const local = try f.allocLocal(inst_ty, .Const);
-    try writer.writeAll(" = -");
-    try f.writeCValue(writer, operand, .Other);
-    try writer.writeAll(";\n");
+    const writer = f.object.writer();
+    try writer.writeAll(" = zig_neg_");
+    try f.object.dg.renderTypeForBuiltinFnName(writer, operand_ty);
+    try writer.writeByte('(');
+    try f.writeCValue(writer, operand, .FunctionArgument);
+    try writer.writeAll(");\n");
     return local;
 }
 
@@ -5178,7 +5258,7 @@ fn airUnFloatOp(f: *Function, inst: Air.Inst.Index, operation: []const u8) !CVal
     const inst_ty = f.air.typeOfIndex(inst);
     const operand = try f.resolveInst(un_op);
     const local = try f.allocLocal(inst_ty, .Const);
-    try writer.writeAll(" = zig_builtin_");
+    try writer.writeAll(" = zig_libc_name_");
     try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty);
     try writer.writeByte('(');
     try writer.writeAll(operation);
@@ -5196,7 +5276,7 @@ fn airBinFloatOp(f: *Function, inst: Air.Inst.Index, operation: []const u8) !CVa
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
     const local = try f.allocLocal(inst_ty, .Const);
-    try writer.writeAll(" = zig_builtin_");
+    try writer.writeAll(" = zig_libc_name_");
     try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty);
     try writer.writeByte('(');
     try writer.writeAll(operation);
@@ -5218,7 +5298,7 @@ fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue {
     const addend = try f.resolveInst(pl_op.operand);
     const writer = f.object.writer();
     const local = try f.allocLocal(inst_ty, .Const);
-    try writer.writeAll(" = zig_builtin_");
+    try writer.writeAll(" = zig_libc_name_");
     try f.object.dg.renderTypeForBuiltinFnName(writer, inst_ty);
     try writer.writeAll("(fma)(");
     try f.writeCValue(writer, mulend1, .FunctionArgument);
@@ -5328,6 +5408,22 @@ fn signAbbrev(signedness: std.builtin.Signedness) u8 {
     };
 }
 
+fn compilerRtAbbrev(ty: Type, target: std.Target) []const u8 {
+    return if (ty.isInt()) switch (ty.intInfo(target).bits) {
+        1...32 => "si",
+        33...64 => "di",
+        65...128 => "ti",
+        else => unreachable,
+    } else if (ty.isRuntimeFloat()) switch (ty.floatBits(target)) {
+        16 => "hf",
+        32 => "sf",
+        64 => "df",
+        80 => "xf",
+        128 => "tf",
+        else => unreachable,
+    } else unreachable;
+}
+
 fn formatStringLiteral(
     str: []const u8,
     comptime fmt: []const u8,
@@ -5356,8 +5452,10 @@ fn fmtStringLiteral(str: []const u8) std.fmt.Formatter(formatStringLiteral) {
     return .{ .data = str };
 }
 
-fn undefPattern(comptime T: type) T {
-    return (1 << (@bitSizeOf(T) | 1)) / 3;
+fn undefPattern(comptime IntType: type) IntType {
+    const int_info = @typeInfo(IntType).Int;
+    const UnsignedType = std.meta.Int(.unsigned, int_info.bits);
+    return @bitCast(IntType, @as(UnsignedType, (1 << (int_info.bits | 1)) / 3));
 }
 
 const FormatIntLiteralContext = struct {
@@ -5374,30 +5472,29 @@ fn formatIntLiteral(
     const target = data.mod.getTarget();
     const int_info = data.ty.intInfo(target);
 
-    const Limb = std.math.big.Limb;
     const ExpectedContents = struct {
         const base = 10;
         const limbs_count_128 = BigInt.calcTwosCompLimbCount(128);
         const expected_needed_limbs_count = BigInt.calcToStringLimbsBufferLen(limbs_count_128, base);
         const worst_case_int = BigInt.Const{
-            .limbs = &([1]Limb{std.math.maxInt(Limb)} ** expected_needed_limbs_count),
+            .limbs = &([1]BigIntLimb{std.math.maxInt(BigIntLimb)} ** expected_needed_limbs_count),
             .positive = false,
         };
 
-        undef_limbs: [limbs_count_128]Limb,
-        wrap_limbs: [limbs_count_128]Limb,
+        undef_limbs: [limbs_count_128]BigIntLimb,
+        wrap_limbs: [limbs_count_128]BigIntLimb,
     };
     var stack align(@alignOf(ExpectedContents)) =
         std.heap.stackFallback(@sizeOf(ExpectedContents), data.mod.gpa);
     const allocator = stack.get();
 
-    var undef_limbs: []Limb = &.{};
+    var undef_limbs: []BigIntLimb = &.{};
     defer allocator.free(undef_limbs);
 
     var int_buf: Value.BigIntSpace = undefined;
     const int = if (data.val.isUndefDeep()) blk: {
-        undef_limbs = try allocator.alloc(Limb, BigInt.calcTwosCompLimbCount(int_info.bits));
-        std.mem.set(Limb, undef_limbs, undefPattern(Limb));
+        undef_limbs = try allocator.alloc(BigIntLimb, BigInt.calcTwosCompLimbCount(int_info.bits));
+        std.mem.set(BigIntLimb, undef_limbs, undefPattern(BigIntLimb));
 
         var undef_int = BigInt.Mutable{
             .limbs = undef_limbs,
@@ -5410,10 +5507,10 @@ fn formatIntLiteral(
     assert(int.fitsInTwosComp(int_info.signedness, int_info.bits));
 
     const c_bits = toCIntBits(int_info.bits) orelse unreachable;
-    var one_limbs: [BigInt.calcLimbLen(1)]Limb = undefined;
+    var one_limbs: [BigInt.calcLimbLen(1)]BigIntLimb = undefined;
     const one = BigInt.Mutable.init(&one_limbs, 1).toConst();
 
-    const wrap_limbs = try allocator.alloc(Limb, BigInt.calcTwosCompLimbCount(c_bits));
+    const wrap_limbs = try allocator.alloc(BigIntLimb, BigInt.calcTwosCompLimbCount(c_bits));
     defer allocator.free(wrap_limbs);
     var wrap = BigInt.Mutable{ .limbs = wrap_limbs, .len = undefined, .positive = undefined };
     if (wrap.addWrap(int, one, int_info.signedness, c_bits) or
@@ -5439,7 +5536,7 @@ fn formatIntLiteral(
         else => try writer.print("zig_as_{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits }),
     }
 
-    const limbs_count_64 = @divExact(64, @bitSizeOf(Limb));
+    const limbs_count_64 = @divExact(64, @bitSizeOf(BigIntLimb));
     if (c_bits <= 64) {
         var base: u8 = undefined;
         var case: std.fmt.Case = undefined;
@@ -5471,7 +5568,7 @@ fn formatIntLiteral(
         }
 
         var str: [64]u8 = undefined;
-        var limbs_buf: [BigInt.calcToStringLimbsBufferLen(limbs_count_64, 10)]Limb = undefined;
+        var limbs_buf: [BigInt.calcToStringLimbsBufferLen(limbs_count_64, 10)]BigIntLimb = undefined;
         try writer.writeAll(str[0..int.abs().toString(&str, base, case, &limbs_buf)]);
     } else {
         assert(c_bits == 128);
test/tests.zig
@@ -678,10 +678,6 @@ pub fn addPkgTests(
                 // https://github.com/ziglang/zig/issues/12415
                 continue;
             }
-            if (backend == .stage2_c and builtin.cpu.arch == .aarch64) {
-                // https://github.com/ziglang/zig/issues/13357
-                continue;
-            }
         }
 
         const libc_prefix = if (test_target.target.getOs().requiresLibC())