|
1 |
| -use simd_llvm::{simd_shuffle8, simd_shuffle32}; |
| 1 | +use simd_llvm::{simd_shuffle2, simd_shuffle4, simd_shuffle8}; |
| 2 | +use simd_llvm::{simd_shuffle16, simd_shuffle32}; |
2 | 3 | use v256::*;
|
3 | 4 | use v128::*;
|
4 | 5 | use x86::__m256i;
|
@@ -245,20 +246,130 @@ pub unsafe fn _mm256_blendv_epi8(a:i8x32,b:i8x32,mask:__m256i) -> i8x32 {
|
245 | 246 | pblendvb(a,b,mask)
|
246 | 247 | }
|
247 | 248 |
|
248 |
| -// TODO _mm_broadcastb_epi8 |
249 |
| -// TODO _mm256_broadcastb_epi8 |
250 |
| -// TODO _mm_broadcastd_epi32 |
251 |
| -// TODO _mm256_broadcastd_epi32 |
252 |
| -// TODO _mm_broadcastq_epi64 |
253 |
| -// TODO _mm256_broadcastq_epi64 |
254 |
| -// TODO _mm_broadcastsd_pd |
255 |
| -// TODO _mm256_broadcastsd_pd |
256 |
| -// TODO _mm_broadcastsi128_si256 |
257 |
| -// TODO _mm256_broadcastsi128_si256 |
258 |
| -// TODO _mm_broadcastss_ps |
259 |
| -// TODO _mm256_broadcastss_ps |
260 |
| -// TODO _mm_broadcastw_epi16 |
261 |
| -// TODO _mm256_broadcastw_epi16 |
| 249 | +/// Broadcast the low packed 8-bit integer from `a` to all elements of |
| 250 | +/// the 128-bit returned value. |
| 251 | +#[inline(always)] |
| 252 | +#[target_feature = "+avx2"] |
| 253 | +#[cfg_attr(test, assert_instr(vpbroadcastb))] |
| 254 | +pub unsafe fn _mm_broadcastb_epi8(a: i8x16) -> i8x16 { |
| 255 | + simd_shuffle16(a, i8x16::splat(0i8), [0u32; 16]) |
| 256 | +} |
| 257 | + |
| 258 | +/// Broadcast the low packed 8-bit integer from `a` to all elements of |
| 259 | +/// the 256-bit returned value. |
| 260 | +#[inline(always)] |
| 261 | +#[target_feature = "+avx2"] |
| 262 | +#[cfg_attr(test, assert_instr(vpbroadcastb))] |
| 263 | +pub unsafe fn _mm256_broadcastb_epi8(a: i8x16) -> i8x32 { |
| 264 | + simd_shuffle32(a, i8x16::splat(0i8), [0u32; 32]) |
| 265 | +} |
| 266 | + |
| 267 | +// NB: simd_shuffle4 with integer data types for `a` and `b` is |
| 268 | +// often compiled to vbroadcastss. |
| 269 | +/// Broadcast the low packed 32-bit integer from `a` to all elements of |
| 270 | +/// the 128-bit returned value. |
| 271 | +#[inline(always)] |
| 272 | +#[target_feature = "+avx2"] |
| 273 | +#[cfg_attr(test, assert_instr(vbroadcastss))] |
| 274 | +pub unsafe fn _mm_broadcastd_epi32(a: i32x4) -> i32x4 { |
| 275 | + simd_shuffle4(a, i32x4::splat(0i32), [0u32; 4]) |
| 276 | +} |
| 277 | + |
| 278 | +// NB: simd_shuffle4 with integer data types for `a` and `b` is |
| 279 | +// often compiled to vbroadcastss. |
| 280 | +/// Broadcast the low packed 32-bit integer from `a` to all elements of |
| 281 | +/// the 256-bit returned value. |
| 282 | +#[inline(always)] |
| 283 | +#[target_feature = "+avx2"] |
| 284 | +#[cfg_attr(test, assert_instr(vbroadcastss))] |
| 285 | +pub unsafe fn _mm256_broadcastd_epi32(a: i32x4) -> i32x8 { |
| 286 | + simd_shuffle8(a, i32x4::splat(0i32), [0u32; 8]) |
| 287 | +} |
| 288 | + |
| 289 | +/// Broadcast the low packed 64-bit integer from `a` to all elements of |
| 290 | +/// the 128-bit returned value. |
| 291 | +#[inline(always)] |
| 292 | +#[target_feature = "+avx2"] |
| 293 | +#[cfg_attr(test, assert_instr(vpbroadcastq))] |
| 294 | +pub unsafe fn _mm_broadcastq_epi64(a: i64x2) -> i64x2 { |
| 295 | + simd_shuffle2(a, i64x2::splat(0i64), [0u32; 2]) |
| 296 | +} |
| 297 | + |
| 298 | +// NB: simd_shuffle4 with integer data types for `a` and `b` is |
| 299 | +// often compiled to vbroadcastsd. |
| 300 | +/// Broadcast the low packed 64-bit integer from `a` to all elements of |
| 301 | +/// the 256-bit returned value. |
| 302 | +#[inline(always)] |
| 303 | +#[target_feature = "+avx2"] |
| 304 | +#[cfg_attr(test, assert_instr(vbroadcastsd))] |
| 305 | +pub unsafe fn _mm256_broadcastq_epi64(a: i64x2) -> i64x4 { |
| 306 | + simd_shuffle4(a, i64x2::splat(0i64), [0u32; 4]) |
| 307 | +} |
| 308 | + |
| 309 | +/// Broadcast the low double-precision (64-bit) floating-point element |
| 310 | +/// from `a` to all elements of the 128-bit returned value. |
| 311 | +#[inline(always)] |
| 312 | +#[target_feature = "+avx2"] |
| 313 | +#[cfg_attr(test, assert_instr(vmovddup))] |
| 314 | +pub unsafe fn _mm_broadcastsd_pd(a: f64x2) -> f64x2 { |
| 315 | + simd_shuffle2(a, f64x2::splat(0f64), [0u32; 2]) |
| 316 | +} |
| 317 | + |
| 318 | +/// Broadcast the low double-precision (64-bit) floating-point element |
| 319 | +/// from `a` to all elements of the 256-bit returned value. |
| 320 | +#[inline(always)] |
| 321 | +#[target_feature = "+avx2"] |
| 322 | +#[cfg_attr(test, assert_instr(vbroadcastsd))] |
| 323 | +pub unsafe fn _mm256_broadcastsd_pd(a: f64x2) -> f64x4 { |
| 324 | + simd_shuffle4(a, f64x2::splat(0f64), [0u32; 4]) |
| 325 | +} |
| 326 | + |
| 327 | +// NB: broadcastsi128_si256 is often compiled to vinsertf128 or |
| 328 | +// vbroadcastf128. |
| 329 | +/// Broadcast 128 bits of integer data from a to all 128-bit lanes in |
| 330 | +/// the 256-bit returned value. |
| 331 | +#[inline(always)] |
| 332 | +#[target_feature = "+avx2"] |
| 333 | +pub unsafe fn _mm256_broadcastsi128_si256(a: i64x2) -> i64x4 { |
| 334 | + simd_shuffle4(a, i64x2::splat(0i64), [0, 1, 0, 1]) |
| 335 | +} |
| 336 | + |
| 337 | +/// Broadcast the low single-precision (32-bit) floating-point element |
| 338 | +/// from `a` to all elements of the 128-bit returned value. |
| 339 | +#[inline(always)] |
| 340 | +#[target_feature = "+avx2"] |
| 341 | +#[cfg_attr(test, assert_instr(vbroadcastss))] |
| 342 | +pub unsafe fn _mm_broadcastss_ps(a: f32x4) -> f32x4 { |
| 343 | + simd_shuffle4(a, f32x4::splat(0f32), [0u32; 4]) |
| 344 | +} |
| 345 | + |
| 346 | +/// Broadcast the low single-precision (32-bit) floating-point element |
| 347 | +/// from `a` to all elements of the 256-bit returned value. |
| 348 | +#[inline(always)] |
| 349 | +#[target_feature = "+avx2"] |
| 350 | +#[cfg_attr(test, assert_instr(vbroadcastss))] |
| 351 | +pub unsafe fn _mm256_broadcastss_ps(a: f32x4) -> f32x8 { |
| 352 | + simd_shuffle8(a, f32x4::splat(0f32), [0u32; 8]) |
| 353 | +} |
| 354 | + |
| 355 | +/// Broadcast the low packed 16-bit integer from a to all elements of |
| 356 | +/// the 128-bit returned value |
| 357 | +#[inline(always)] |
| 358 | +#[target_feature = "+avx2"] |
| 359 | +#[cfg_attr(test, assert_instr(vpbroadcastw))] |
| 360 | +pub unsafe fn _mm_broadcastw_epi16(a: i16x8) -> i16x8 { |
| 361 | + simd_shuffle8(a, i16x8::splat(0i16), [0u32; 8]) |
| 362 | +} |
| 363 | + |
| 364 | +/// Broadcast the low packed 16-bit integer from a to all elements of |
| 365 | +/// the 256-bit returned value |
| 366 | +#[inline(always)] |
| 367 | +#[target_feature = "+avx2"] |
| 368 | +#[cfg_attr(test, assert_instr(vpbroadcastw))] |
| 369 | +pub unsafe fn _mm256_broadcastw_epi16(a: i16x8) -> i16x16 { |
| 370 | + simd_shuffle16(a, i16x8::splat(0i16), [0u32; 16]) |
| 371 | +} |
| 372 | + |
262 | 373 | // TODO _mm256_bslli_epi128
|
263 | 374 | // TODO _mm256_bsrli_epi128
|
264 | 375 |
|
@@ -1517,6 +1628,99 @@ mod tests {
|
1517 | 1628 | assert_eq!(r,e);
|
1518 | 1629 | }
|
1519 | 1630 |
|
| 1631 | + #[simd_test = "avx2"] |
| 1632 | + unsafe fn _mm_broadcastb_epi8() { |
| 1633 | + let a = i8x16::splat(0x00).replace(0, 0x2a); |
| 1634 | + let res = avx2::_mm_broadcastb_epi8(a); |
| 1635 | + assert_eq!(res, i8x16::splat(0x2a)); |
| 1636 | + } |
| 1637 | + |
| 1638 | + #[simd_test = "avx2"] |
| 1639 | + unsafe fn _mm256_broadcastb_epi8() { |
| 1640 | + let a = i8x16::splat(0x00).replace(0, 0x2a); |
| 1641 | + let res = avx2::_mm256_broadcastb_epi8(a); |
| 1642 | + assert_eq!(res, i8x32::splat(0x2a)); |
| 1643 | + } |
| 1644 | + |
| 1645 | + #[simd_test = "avx2"] |
| 1646 | + unsafe fn _mm_broadcastd_epi32() { |
| 1647 | + let a = i32x4::splat(0x00).replace(0, 0x2a).replace(1, 0x8000000); |
| 1648 | + let res = avx2::_mm_broadcastd_epi32(a); |
| 1649 | + assert_eq!(res, i32x4::splat(0x2a)); |
| 1650 | + } |
| 1651 | + |
| 1652 | + #[simd_test = "avx2"] |
| 1653 | + unsafe fn _mm256_broadcastd_epi32() { |
| 1654 | + let a = i32x4::splat(0x00).replace(0, 0x2a).replace(1, 0x8000000); |
| 1655 | + let res = avx2::_mm256_broadcastd_epi32(a); |
| 1656 | + assert_eq!(res, i32x8::splat(0x2a)); |
| 1657 | + } |
| 1658 | + |
| 1659 | + #[simd_test = "avx2"] |
| 1660 | + unsafe fn _mm_broadcastq_epi64() { |
| 1661 | + let a = i64x2::splat(0x00).replace(0, 0x1ffffffff); |
| 1662 | + let res = avx2::_mm_broadcastq_epi64(a); |
| 1663 | + assert_eq!(res, i64x2::splat(0x1ffffffff)); |
| 1664 | + } |
| 1665 | + |
| 1666 | + #[simd_test = "avx2"] |
| 1667 | + unsafe fn _mm256_broadcastq_epi64() { |
| 1668 | + let a = i64x2::splat(0x00).replace(0, 0x1ffffffff); |
| 1669 | + let res = avx2::_mm256_broadcastq_epi64(a); |
| 1670 | + assert_eq!(res, i64x4::splat(0x1ffffffff)); |
| 1671 | + } |
| 1672 | + |
| 1673 | + #[simd_test = "avx2"] |
| 1674 | + unsafe fn _mm_broadcastsd_pd() { |
| 1675 | + let a = f64x2::splat(3.14f64).replace(0, 6.28f64); |
| 1676 | + let res = avx2::_mm_broadcastsd_pd(a); |
| 1677 | + assert_eq!(res, f64x2::splat(6.28f64)); |
| 1678 | + } |
| 1679 | + |
| 1680 | + #[simd_test = "avx2"] |
| 1681 | + unsafe fn _mm256_broadcastsd_pd() { |
| 1682 | + let a = f64x2::splat(3.14f64).replace(0, 6.28f64); |
| 1683 | + let res = avx2::_mm256_broadcastsd_pd(a); |
| 1684 | + assert_eq!(res, f64x4::splat(6.28f64)); |
| 1685 | + } |
| 1686 | + |
| 1687 | + #[simd_test = "avx2"] |
| 1688 | + unsafe fn _mm256_broadcastsi128_si256() { |
| 1689 | + let a = i64x2::new(0x0987654321012334, 0x5678909876543210); |
| 1690 | + let res = avx2::_mm256_broadcastsi128_si256(a); |
| 1691 | + let retval = i64x4::new(0x0987654321012334, 0x5678909876543210, |
| 1692 | + 0x0987654321012334, 0x5678909876543210); |
| 1693 | + assert_eq!(res, retval); |
| 1694 | + } |
| 1695 | + |
| 1696 | + #[simd_test = "avx2"] |
| 1697 | + unsafe fn _mm_broadcastss_ps() { |
| 1698 | + let a = f32x4::splat(3.14f32).replace(0, 6.28f32); |
| 1699 | + let res = avx2::_mm_broadcastss_ps(a); |
| 1700 | + assert_eq!(res, f32x4::splat(6.28f32)); |
| 1701 | + } |
| 1702 | + |
| 1703 | + #[simd_test = "avx2"] |
| 1704 | + unsafe fn _mm256_broadcastss_ps() { |
| 1705 | + let a = f32x4::splat(3.14f32).replace(0, 6.28f32); |
| 1706 | + let res = avx2::_mm256_broadcastss_ps(a); |
| 1707 | + assert_eq!(res, f32x8::splat(6.28f32)); |
| 1708 | + } |
| 1709 | + |
| 1710 | + #[simd_test = "avx2"] |
| 1711 | + unsafe fn _mm_broadcastw_epi16() { |
| 1712 | + let a = i16x8::splat(0x2a).replace(0, 0x22b); |
| 1713 | + let res = avx2::_mm_broadcastw_epi16(a); |
| 1714 | + assert_eq!(res, i16x8::splat(0x22b)); |
| 1715 | + } |
| 1716 | + |
| 1717 | + #[simd_test = "avx2"] |
| 1718 | + unsafe fn _mm256_broadcastw_epi16() { |
| 1719 | + let a = i16x8::splat(0x2a).replace(0, 0x22b); |
| 1720 | + let res = avx2::_mm256_broadcastw_epi16(a); |
| 1721 | + assert_eq!(res, i16x16::splat(0x22b)); |
| 1722 | + } |
| 1723 | + |
1520 | 1724 | #[simd_test = "avx2"]
|
1521 | 1725 | unsafe fn _mm256_cmpeq_epi8() {
|
1522 | 1726 | let a = i8x32::new(
|
|
0 commit comments