1
Fork 0

Add f16 and f128 inline ASM support for x86 and x86-64

This commit is contained in:
beetrees 2024-06-13 16:09:45 +01:00
parent 9fdbfe1441
commit dfc5514527
No known key found for this signature in database
GPG key ID: 8791BD754191EBD6
6 changed files with 350 additions and 42 deletions

View file

@ -707,15 +707,19 @@ pub enum InlineAsmType {
I32,
I64,
I128,
F16,
F32,
F64,
F128,
VecI8(u64),
VecI16(u64),
VecI32(u64),
VecI64(u64),
VecI128(u64),
VecF16(u64),
VecF32(u64),
VecF64(u64),
VecF128(u64),
}
impl InlineAsmType {
@ -730,15 +734,19 @@ impl InlineAsmType {
Self::I32 => 4,
Self::I64 => 8,
Self::I128 => 16,
Self::F16 => 2,
Self::F32 => 4,
Self::F64 => 8,
Self::F128 => 16,
Self::VecI8(n) => n * 1,
Self::VecI16(n) => n * 2,
Self::VecI32(n) => n * 4,
Self::VecI64(n) => n * 8,
Self::VecI128(n) => n * 16,
Self::VecF16(n) => n * 2,
Self::VecF32(n) => n * 4,
Self::VecF64(n) => n * 8,
Self::VecF128(n) => n * 16,
})
}
}
@ -751,15 +759,19 @@ impl fmt::Display for InlineAsmType {
Self::I32 => f.write_str("i32"),
Self::I64 => f.write_str("i64"),
Self::I128 => f.write_str("i128"),
Self::F16 => f.write_str("f16"),
Self::F32 => f.write_str("f32"),
Self::F64 => f.write_str("f64"),
Self::F128 => f.write_str("f128"),
Self::VecI8(n) => write!(f, "i8x{n}"),
Self::VecI16(n) => write!(f, "i16x{n}"),
Self::VecI32(n) => write!(f, "i32x{n}"),
Self::VecI64(n) => write!(f, "i64x{n}"),
Self::VecI128(n) => write!(f, "i128x{n}"),
Self::VecF16(n) => write!(f, "f16x{n}"),
Self::VecF32(n) => write!(f, "f32x{n}"),
Self::VecF64(n) => write!(f, "f64x{n}"),
Self::VecF128(n) => write!(f, "f128x{n}"),
}
}
}

View file

@ -107,26 +107,26 @@ impl X86InlineAsmRegClass {
match self {
Self::reg | Self::reg_abcd => {
if arch == InlineAsmArch::X86_64 {
types! { _: I16, I32, I64, F32, F64; }
types! { _: I16, I32, I64, F16, F32, F64; }
} else {
types! { _: I16, I32, F32; }
types! { _: I16, I32, F16, F32; }
}
}
Self::reg_byte => types! { _: I8; },
Self::xmm_reg => types! {
sse: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
sse: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
},
Self::ymm_reg => types! {
avx: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4);
avx: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4);
},
Self::zmm_reg => types! {
avx512f: I32, I64, F32, F64,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF32(8), VecF64(4),
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF32(16), VecF64(8);
avx512f: I32, I64, F16, F32, F64, F128,
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2),
VecI8(32), VecI16(16), VecI32(8), VecI64(4), VecF16(16), VecF32(8), VecF64(4),
VecI8(64), VecI16(32), VecI32(16), VecI64(8), VecF16(32), VecF32(16), VecF64(8);
},
Self::kreg => types! {
avx512f: I8, I16;