Skip to content

Commit b2fae58

Browse files
sayantnAmanieu
authored andcommitted
Fix errors in incorrect SAE and ROUNDING parameters
1 parent f1c1839 commit b2fae58

File tree

3 files changed

+123
-78
lines changed

3 files changed

+123
-78
lines changed

crates/core_arch/src/x86/avx512f.rs

+93-33
Original file line numberDiff line numberDiff line change
@@ -15046,60 +15046,90 @@ pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m5
1504615046
}
1504715047

1504815048
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049-
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15049+
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15050+
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15051+
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15052+
/// * [`_MM_FROUND_TO_POS_INF`] // round up
15053+
/// * [`_MM_FROUND_TO_ZERO`] // truncate
15054+
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15055+
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15056+
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15057+
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15058+
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15059+
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
1505015060
///
1505115061
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
1505215062
#[inline]
1505315063
#[target_feature(enable = "avx512f")]
1505415064
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15055-
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15065+
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
1505615066
#[rustc_legacy_const_generics(1)]
15057-
pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
15067+
pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
1505815068
unsafe {
15059-
static_assert_sae!(SAE);
15069+
static_assert_extended_rounding!(ROUNDING);
1506015070
let a = a.as_f32x16();
15061-
let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15071+
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
1506215072
transmute(r)
1506315073
}
1506415074
}
1506515075

1506615076
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15067-
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15077+
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15078+
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15079+
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15080+
/// * [`_MM_FROUND_TO_POS_INF`] // round up
15081+
/// * [`_MM_FROUND_TO_ZERO`] // truncate
15082+
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15083+
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15084+
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15085+
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15086+
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15087+
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
1506815088
///
1506915089
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
1507015090
#[inline]
1507115091
#[target_feature(enable = "avx512f")]
1507215092
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15073-
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15093+
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
1507415094
#[rustc_legacy_const_generics(3)]
15075-
pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
15095+
pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
1507615096
src: __m256i,
1507715097
k: __mmask16,
1507815098
a: __m512,
1507915099
) -> __m256i {
1508015100
unsafe {
15081-
static_assert_sae!(SAE);
15101+
static_assert_extended_rounding!(ROUNDING);
1508215102
let a = a.as_f32x16();
1508315103
let src = src.as_i16x16();
15084-
let r = vcvtps2ph(a, SAE, src, k);
15104+
let r = vcvtps2ph(a, ROUNDING, src, k);
1508515105
transmute(r)
1508615106
}
1508715107
}
1508815108

1508915109
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15090-
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15110+
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15111+
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15112+
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15113+
/// * [`_MM_FROUND_TO_POS_INF`] // round up
15114+
/// * [`_MM_FROUND_TO_ZERO`] // truncate
15115+
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15116+
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15117+
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15118+
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15119+
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15120+
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
1509115121
///
1509215122
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
1509315123
#[inline]
1509415124
#[target_feature(enable = "avx512f")]
1509515125
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15096-
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15126+
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
1509715127
#[rustc_legacy_const_generics(2)]
15098-
pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15128+
pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
1509915129
unsafe {
15100-
static_assert_sae!(SAE);
15130+
static_assert_extended_rounding!(ROUNDING);
1510115131
let a = a.as_f32x16();
15102-
let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15132+
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
1510315133
transmute(r)
1510415134
}
1510515135
}
@@ -15203,56 +15233,86 @@ pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m1
1520315233
}
1520415234

1520515235
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15206-
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15236+
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15237+
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15238+
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15239+
/// * [`_MM_FROUND_TO_POS_INF`] // round up
15240+
/// * [`_MM_FROUND_TO_ZERO`] // truncate
15241+
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15242+
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15243+
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15244+
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15245+
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15246+
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
1520715247
///
1520815248
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
1520915249
#[inline]
1521015250
#[target_feature(enable = "avx512f")]
1521115251
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15212-
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15252+
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
1521315253
#[rustc_legacy_const_generics(1)]
15214-
pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
15254+
pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
1521515255
unsafe {
15216-
static_assert_sae!(SAE);
15256+
static_assert_extended_rounding!(ROUNDING);
1521715257
let a = a.as_f32x16();
15218-
let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
15258+
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
1521915259
transmute(r)
1522015260
}
1522115261
}
1522215262

1522315263
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15224-
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15264+
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15265+
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15266+
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15267+
/// * [`_MM_FROUND_TO_POS_INF`] // round up
15268+
/// * [`_MM_FROUND_TO_ZERO`] // truncate
15269+
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15270+
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15271+
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15272+
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15273+
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15274+
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
1522515275
///
1522615276
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
1522715277
#[inline]
1522815278
#[target_feature(enable = "avx512f")]
1522915279
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15230-
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15280+
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
1523115281
#[rustc_legacy_const_generics(3)]
15232-
pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15282+
pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
1523315283
unsafe {
15234-
static_assert_sae!(SAE);
15284+
static_assert_extended_rounding!(ROUNDING);
1523515285
let a = a.as_f32x16();
1523615286
let src = src.as_i16x16();
15237-
let r = vcvtps2ph(a, SAE, src, k);
15287+
let r = vcvtps2ph(a, ROUNDING, src, k);
1523815288
transmute(r)
1523915289
}
1524015290
}
1524115291

1524215292
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15243-
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15293+
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15294+
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15295+
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15296+
/// * [`_MM_FROUND_TO_POS_INF`] // round up
15297+
/// * [`_MM_FROUND_TO_ZERO`] // truncate
15298+
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15299+
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15300+
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15301+
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15302+
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15303+
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
1524415304
///
1524515305
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
1524615306
#[inline]
1524715307
#[target_feature(enable = "avx512f")]
1524815308
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15249-
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15309+
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
1525015310
#[rustc_legacy_const_generics(2)]
15251-
pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15311+
pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
1525215312
unsafe {
15253-
static_assert_sae!(SAE);
15313+
static_assert_extended_rounding!(ROUNDING);
1525415314
let a = a.as_f32x16();
15255-
let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
15315+
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
1525615316
transmute(r)
1525715317
}
1525815318
}
@@ -42487,11 +42547,11 @@ unsafe extern "C" {
4248742547
fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
4248842548

4248942549
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42490-
fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
42550+
fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
4249142551
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42492-
fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
42552+
fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
4249342553
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42494-
fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
42554+
fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
4249542555

4249642556
#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
4249742557
fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;

0 commit comments

Comments
 (0)