Skip to content

Commit fe30e8a

Browse files
committed
Update f16c intrinsics to use the f16c target feature
1 parent aacbae3 commit fe30e8a

File tree

3 files changed

+10
-11
lines changed

3 files changed

+10
-11
lines changed

crates/core_arch/src/x86/f16c.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ extern "unadjusted" {
2727
/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
2828
/// vector.
2929
#[inline]
30-
#[target_feature(enable = "avx512f")]
30+
#[target_feature(enable = "f16c")]
3131
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
3232
pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
3333
transmute(llvm_vcvtph2ps_128(transmute(a)))
@@ -36,7 +36,7 @@ pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
3636
/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
3737
/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
3838
#[inline]
39-
#[target_feature(enable = "avx512f")]
39+
#[target_feature(enable = "f16c")]
4040
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
4141
pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
4242
transmute(llvm_vcvtph2ps_256(transmute(a)))
@@ -70,7 +70,7 @@ macro_rules! dispatch_rounding {
7070
/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
7171
/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
7272
#[inline]
73-
#[target_feature(enable = "avx512f")]
73+
#[target_feature(enable = "f16c")]
7474
#[rustc_args_required_const(1)]
7575
#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
7676
pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
@@ -94,7 +94,7 @@ pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
9494
/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
9595
/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
9696
#[inline]
97-
#[target_feature(enable = "avx512f")]
97+
#[target_feature(enable = "f16c")]
9898
#[rustc_args_required_const(1)]
9999
#[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
100100
pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
@@ -112,7 +112,7 @@ mod tests {
112112
use crate::{core_arch::x86::*, mem::transmute};
113113
use stdsimd_test::simd_test;
114114

115-
#[simd_test(enable = "avx512f")]
115+
#[simd_test(enable = "f16c")]
116116
unsafe fn test_mm_cvtph_ps() {
117117
let array = [1_f32, 2_f32, 3_f32, 4_f32];
118118
let float_vec: __m128 = transmute(array);
@@ -122,7 +122,7 @@ mod tests {
122122
assert_eq!(result, array);
123123
}
124124

125-
#[simd_test(enable = "avx512f")]
125+
#[simd_test(enable = "f16c")]
126126
unsafe fn test_mm256_cvtph_ps() {
127127
let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
128128
let float_vec: __m256 = transmute(array);

crates/std_detect/src/detect/arch/x86.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ macro_rules! is_x86_feature_detected {
179179
$crate::detect::Feature::avx512_vpopcntdq)
180180
};
181181
("f16c") => {
182-
cfg!(target_feature = "avx512f") || $crate::detect::check_for(
182+
cfg!(target_feature = "f16c") || $crate::detect::check_for(
183183
$crate::detect::Feature::f16c)
184184
};
185185
("fma") => {

crates/stdsimd-verify/tests/x86-intel.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
299299
// it "avx512ifma".
300300
"avx512ifma52" => String::from("avx512ifma"),
301301
// See: https://github.com/rust-lang-nursery/stdsimd/issues/738
302-
// FIXME: we need to fix "fp16c" to "f16c" here. Since
303-
// https://github.com/rust-lang/rust/pull/60191 is not merged,
304-
// we temporarily map it to "avx512f".
305-
"fp16c" => String::from("avx512f"),
302+
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
303+
// Intel's architecture manuals.
304+
"fp16c" => String::from("f16c"),
306305
_ => cpuid,
307306
};
308307
let fixed_cpuid = fixup_cpuid(cpuid);

0 commit comments

Comments
 (0)