Skip to content

Commit b29ce12

Browse files
jneemalexcrichton
authored andcommitted
avx2: signed extensions (rust-lang#132)
_mm256_cvtepi8_epi16 _mm256_cvtepi8_epi32 _mm256_cvtepi8_epi64 _mm256_cvtepi16_epi32 _mm256_cvtepi16_epi64 _mm256_cvtepi32_epi64
1 parent f36ccde commit b29ce12

File tree

1 file changed

+97
-6
lines changed

1 file changed

+97
-6
lines changed

src/x86/avx2.rs

+97-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use simd_llvm::simd_cast;
12
use simd_llvm::{simd_shuffle2, simd_shuffle4, simd_shuffle8};
23
use simd_llvm::{simd_shuffle16, simd_shuffle32};
34
use v256::*;
@@ -514,12 +515,60 @@ pub unsafe fn _mm256_cmpgt_epi8(a: i8x32, b: i8x32) -> i8x32 {
514515
a.gt(b)
515516
}
516517

517-
// TODO _mm256_cvtepi16_epi32
518-
// TODO _mm256_cvtepi16_epi64
519-
// TODO _mm256_cvtepi32_epi64
520-
// TODO _mm256_cvtepi8_epi16
521-
// TODO _mm256_cvtepi8_epi32
522-
// TODO _mm256_cvtepi8_epi64
518+
/// Sign-extend 16-bit integers to 32-bit integers.
519+
#[inline(always)]
520+
#[target_feature = "+avx2"]
521+
#[cfg_attr(test, assert_instr(vpmovsxwd))]
522+
pub unsafe fn _mm256_cvtepi16_epi32(a: i16x8) -> i32x8 {
523+
simd_cast(a)
524+
}
525+
526+
/// Sign-extend 16-bit integers to 64-bit integers.
527+
#[inline(always)]
528+
#[target_feature = "+avx2"]
529+
#[cfg_attr(test, assert_instr(vpmovsxwq))]
530+
pub unsafe fn _mm256_cvtepi16_epi64(a: i16x8) -> i64x4 {
531+
simd_cast::<::v64::i16x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
532+
}
533+
534+
/// Sign-extend 32-bit integers to 64-bit integers.
535+
#[inline(always)]
536+
#[target_feature = "+avx2"]
537+
#[cfg_attr(test, assert_instr(vpmovsxdq))]
538+
pub unsafe fn _mm256_cvtepi32_epi64(a: i32x4) -> i64x4 {
539+
simd_cast(a)
540+
}
541+
542+
/// Sign-extend 8-bit integers to 16-bit integers.
543+
#[inline(always)]
544+
#[target_feature = "+avx2"]
545+
#[cfg_attr(test, assert_instr(vpmovsxbw))]
546+
pub unsafe fn _mm256_cvtepi8_epi16(a: i8x16) -> i16x16 {
547+
simd_cast(a)
548+
}
549+
550+
/// Sign-extend 8-bit integers to 32-bit integers.
551+
#[inline(always)]
552+
#[target_feature = "+avx2"]
553+
#[cfg_attr(test, assert_instr(vpmovsxbd))]
554+
pub unsafe fn _mm256_cvtepi8_epi32(a: i8x16) -> i32x8 {
555+
simd_cast::<::v64::i8x8, _>(simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]))
556+
}
557+
558+
// An i8x4 type is pretty useless, but we need it as an intermediate type in
559+
// _mm256_cvtepi8_epi64.
560+
#[repr(simd)]
561+
#[allow(non_camel_case_types)]
562+
struct i8x4(i8, i8, i8, i8);
563+
564+
/// Sign-extend 8-bit integers to 64-bit integers.
565+
#[inline(always)]
566+
#[target_feature = "+avx2"]
567+
#[cfg_attr(test, assert_instr(vpmovsxbq))]
568+
pub unsafe fn _mm256_cvtepi8_epi64(a: i8x16) -> i64x4 {
569+
simd_cast::<i8x4, _>(simd_shuffle4(a, a, [0, 1, 2, 3]))
570+
}
571+
523572
// TODO _mm256_cvtepu16_epi32
524573
// TODO _mm256_cvtepu16_epi64
525574
// TODO _mm256_cvtepu32_epi64
@@ -1940,6 +1989,48 @@ mod tests {
19401989
0, 0xFFFFFFFFFFFFFFFFu64 as i64));
19411990
}
19421991

1992+
#[simd_test = "avx2"]
1993+
unsafe fn _mm256_cvtepi8_epi16() {
1994+
let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
1995+
let r = i16x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
1996+
assert_eq!(r, avx2::_mm256_cvtepi8_epi16(a));
1997+
}
1998+
1999+
#[simd_test = "avx2"]
2000+
unsafe fn _mm256_cvtepi8_epi32() {
2001+
let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
2002+
let r = i32x8::new(0, 0, -1, 1, -2, 2, -3, 3);
2003+
assert_eq!(r, avx2::_mm256_cvtepi8_epi32(a));
2004+
}
2005+
2006+
#[simd_test = "avx2"]
2007+
unsafe fn _mm256_cvtepi8_epi64() {
2008+
let a = i8x16::new(0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7);
2009+
let r = i64x4::new(0, 0, -1, 1);
2010+
assert_eq!(r, avx2::_mm256_cvtepi8_epi64(a));
2011+
}
2012+
2013+
#[simd_test = "avx2"]
2014+
unsafe fn _mm256_cvtepi16_epi32() {
2015+
let a = i16x8::new(0, 0, -1, 1, -2, 2, -3, 3);
2016+
let r = i32x8::new(0, 0, -1, 1, -2, 2, -3, 3);
2017+
assert_eq!(r, avx2::_mm256_cvtepi16_epi32(a));
2018+
}
2019+
2020+
#[simd_test = "avx2"]
2021+
unsafe fn _mm256_cvtepi16_epi64() {
2022+
let a = i16x8::new(0, 0, -1, 1, -2, 2, -3, 3);
2023+
let r = i64x4::new(0, 0, -1, 1);
2024+
assert_eq!(r, avx2::_mm256_cvtepi16_epi64(a));
2025+
}
2026+
2027+
#[simd_test = "avx2"]
2028+
unsafe fn _mm256_cvtepi32_epi64() {
2029+
let a = i32x4::new(0, 0, -1, 1);
2030+
let r = i64x4::new(0, 0, -1, 1);
2031+
assert_eq!(r, avx2::_mm256_cvtepi32_epi64(a));
2032+
}
2033+
19432034
#[simd_test = "avx2"]
19442035
unsafe fn _mm256_hadd_epi16() {
19452036
let a = i16x16::splat(2);

0 commit comments

Comments
 (0)