|
| 1 | +use simd_llvm::simd_cast; |
1 | 2 | use simd_llvm::{simd_shuffle2, simd_shuffle4, simd_shuffle8};
|
2 | 3 | use simd_llvm::{simd_shuffle16, simd_shuffle32};
|
3 | 4 | use v256::*;
|
@@ -514,12 +515,60 @@ pub unsafe fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
|
514 | 515 | a.gt(b)
|
515 | 516 | }
|
516 | 517 |
|
517 |
| -// TODO _mm256_cvtepi16_epi32 |
518 |
| -// TODO _mm256_cvtepi16_epi64 |
519 |
| -// TODO _mm256_cvtepi32_epi64 |
520 |
| -// TODO _mm256_cvtepi8_epi16 |
521 |
| -// TODO _mm256_cvtepi8_epi32 |
522 |
| -// TODO _mm256_cvtepi8_epi64 |
| 518 | +/// Sign-extend 16-bit integers to 32-bit integers. |
| 519 | +#[inline(always)] |
| 520 | +#[target_feature = "+avx2"] |
| 521 | +#[cfg_attr(test, assert_instr(vpmovsxwd))] |
| 522 | +pub unsafe fn _mm256_cvtepi16_epi32(a: i16x8) -> i32x8 { |
| 523 | + simd_cast(a) |
| 524 | +} |
| 525 | + |
| 526 | +/// Sign-extend 16-bit integers to 64-bit integers. |
| 527 | +#[inline(always)] |
| 528 | +#[target_feature = "+avx2"] |
| 529 | +#[cfg_attr(test, assert_instr(vpmovsxwq))] |
| 530 | +pub unsafe fn _mm256_cvtepi16_epi64(a: i16x8) -> i64x4 { |
| 531 | + simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3])) |
| 532 | +} |
| 533 | + |
| 534 | +/// Sign-extend 32-bit integers to 64-bit integers. |
| 535 | +#[inline(always)] |
| 536 | +#[target_feature = "+avx2"] |
| 537 | +#[cfg_attr(test, assert_instr(vpmovsxdq))] |
| 538 | +pub unsafe fn _mm256_cvtepi32_epi64(a: i32x4) -> i64x4 { |
| 539 | + simd_cast(a) |
| 540 | +} |
| 541 | + |
| 542 | +/// Sign-extend 8-bit integers to 16-bit integers. |
| 543 | +#[inline(always)] |
| 544 | +#[target_feature = "+avx2"] |
| 545 | +#[cfg_attr(test, assert_instr(vpmovsxbw))] |
| 546 | +pub unsafe fn _mm256_cvtepi8_epi16(a: i8x16) -> i16x16 { |
| 547 | + simd_cast(a) |
| 548 | +} |
| 549 | + |
| 550 | +/// Sign-extend 8-bit integers to 32-bit integers. |
| 551 | +#[inline(always)] |
| 552 | +#[target_feature = "+avx2"] |
| 553 | +#[cfg_attr(test, assert_instr(vpmovsxbd))] |
| 554 | +pub unsafe fn _mm256_cvtepi8_epi32(a: i8x16) -> i32x8 { |
| 555 | + simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])) |
| 556 | +} |
| 557 | + |
| 558 | +// An i8x4 type is pretty useless, but we need it as an intermediate type in |
| 559 | +// _mm256_cvtepi8_epi64. |
| 560 | +#[repr(simd)] |
| 561 | +#[allow(non_camel_case_types)] |
| 562 | +struct i8x4(i8, i8, i8, i8); |
| 563 | + |
| 564 | +/// Sign-extend 8-bit integers to 64-bit integers. |
| 565 | +#[inline(always)] |
| 566 | +#[target_feature = "+avx2"] |
| 567 | +#[cfg_attr(test, assert_instr(vpmovsxbq))] |
| 568 | +pub unsafe fn _mm256_cvtepi8_epi64(a: i8x16) -> i64x4 { |
| 569 | + simd_cast::<i8x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3])) |
| 570 | +} |
| 571 | + |
523 | 572 | // TODO _mm256_cvtepu16_epi32
|
524 | 573 | // TODO _mm256_cvtepu16_epi64
|
525 | 574 | // TODO _mm256_cvtepu32_epi64
|
@@ -1940,6 +1989,48 @@ mod tests {
|
1940 | 1989 | 0, 0xFFFFFFFFFFFFFFFFu64 as i64));
|
1941 | 1990 | }
|
1942 | 1991 |
|
| 1992 | + #[simd_test = "avx2"] |
| 1993 | + unsafe fn _mm256_cvtepi8_epi16() { |
| 1994 | + let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7); |
| 1995 | + let r = i16x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7); |
| 1996 | + assert_eq!(r, avx2::_mm256_cvtepi8_epi16(a)); |
| 1997 | + } |
| 1998 | + |
| 1999 | + #[simd_test = "avx2"] |
| 2000 | + unsafe fn _mm256_cvtepi8_epi32() { |
| 2001 | + let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7); |
| 2002 | + let r = i32x8::new(0, 0, -1, 1, -2, 2, -3, 3); |
| 2003 | + assert_eq!(r, avx2::_mm256_cvtepi8_epi32(a)); |
| 2004 | + } |
| 2005 | + |
| 2006 | + #[simd_test = "avx2"] |
| 2007 | + unsafe fn _mm256_cvtepi8_epi64() { |
| 2008 | + let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7); |
| 2009 | + let r = i64x4::new(0, 0, -1, 1); |
| 2010 | + assert_eq!(r, avx2::_mm256_cvtepi8_epi64(a)); |
| 2011 | + } |
| 2012 | + |
| 2013 | + #[simd_test = "avx2"] |
| 2014 | + unsafe fn _mm256_cvtepi16_epi32() { |
| 2015 | + let a = i16x8::new(0, 0, -1, 1, -2, 2, -3, 3); |
| 2016 | + let r = i32x8::new(0, 0, -1, 1, -2, 2, -3, 3); |
| 2017 | + assert_eq!(r, avx2::_mm256_cvtepi16_epi32(a)); |
| 2018 | + } |
| 2019 | + |
| 2020 | + #[simd_test = "avx2"] |
| 2021 | + unsafe fn _mm256_cvtepi16_epi64() { |
| 2022 | + let a = i16x8::new(0, 0, -1, 1, -2, 2, -3, 3); |
| 2023 | + let r = i64x4::new(0, 0, -1, 1); |
| 2024 | + assert_eq!(r, avx2::_mm256_cvtepi16_epi64(a)); |
| 2025 | + } |
| 2026 | + |
| 2027 | + #[simd_test = "avx2"] |
| 2028 | + unsafe fn _mm256_cvtepi32_epi64() { |
| 2029 | + let a = i32x4::new(0, 0, -1, 1); |
| 2030 | + let r = i64x4::new(0, 0, -1, 1); |
| 2031 | + assert_eq!(r, avx2::_mm256_cvtepi32_epi64(a)); |
| 2032 | + } |
| 2033 | + |
1943 | 2034 | #[simd_test = "avx2"]
|
1944 | 2035 | unsafe fn _mm256_hadd_epi16() {
|
1945 | 2036 | let a = i16x16::splat(2);
|
|
0 commit comments