Update f16c intrinsics to use the f16c target feature

gnzlbg · gnzlbg · commit fe30e8aea013 · 2019-04-24T16:25:26.000+02:00
diff --git a/crates/core_arch/src/x86/f16c.rs b/crates/core_arch/src/x86/f16c.rs
@@ -27,7 +27,7 @@ extern "unadjusted" {
 /// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
 /// vector.
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
     transmute(llvm_vcvtph2ps_128(transmute(a)))
@@ -36,7 +36,7 @@ pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
 /// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
 /// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
     transmute(llvm_vcvtph2ps_256(transmute(a)))
@@ -70,7 +70,7 @@ macro_rules! dispatch_rounding {
 /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[rustc_args_required_const(1)]
 #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
 pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
@@ -94,7 +94,7 @@ pub unsafe fn _mm_cvtps_ph(a: __m128, imm_rounding: i32) -> __m128i {
 /// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions,
 /// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`].
 #[inline]
-#[target_feature(enable = "avx512f")]
+#[target_feature(enable = "f16c")]
 #[rustc_args_required_const(1)]
 #[cfg_attr(test, assert_instr("vcvtps2ph", imm_rounding = 0))]
 pub unsafe fn _mm256_cvtps_ph(a: __m256, imm_rounding: i32) -> __m128i {
@@ -112,7 +112,7 @@ mod tests {
     use crate::{core_arch::x86::*, mem::transmute};
     use stdsimd_test::simd_test;
 
-    #[simd_test(enable = "avx512f")]
+    #[simd_test(enable = "f16c")]
     unsafe fn test_mm_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32];
         let float_vec: __m128 = transmute(array);
@@ -122,7 +122,7 @@ mod tests {
         assert_eq!(result, array);
     }
 
-    #[simd_test(enable = "avx512f")]
+    #[simd_test(enable = "f16c")]
     unsafe fn test_mm256_cvtph_ps() {
         let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
         let float_vec: __m256 = transmute(array);
diff --git a/crates/std_detect/src/detect/arch/x86.rs b/crates/std_detect/src/detect/arch/x86.rs
@@ -179,7 +179,7 @@ macro_rules! is_x86_feature_detected {
             $crate::detect::Feature::avx512_vpopcntdq)
     };
     ("f16c") => {
-        cfg!(target_feature = "avx512f") || $crate::detect::check_for(
+        cfg!(target_feature = "f16c") || $crate::detect::check_for(
             $crate::detect::Feature::f16c)
     };
     ("fma") => {
diff --git a/crates/stdsimd-verify/tests/x86-intel.rs b/crates/stdsimd-verify/tests/x86-intel.rs
@@ -299,10 +299,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
             // it "avx512ifma".
             "avx512ifma52" => String::from("avx512ifma"),
             // See: https://github.com/rust-lang-nursery/stdsimd/issues/738
-            // FIXME: we need to fix "fp16c" to "f16c" here. Since
-            // https://github.com/rust-lang/rust/pull/60191 is not merged,
-            // we temporarily map it to "avx512f".
-            "fp16c" => String::from("avx512f"),
+            // The intrinsics guide calls `f16c` `fp16c` in disagreement with
+            // Intel's architecture manuals.
+            "fp16c" => String::from("f16c"),
             _ => cpuid,
         };
         let fixed_cpuid = fixup_cpuid(cpuid);