Skip to content

Commit 0c08a8b

Browse files
authored
Add neon instruction vdup (#1071)
1 parent cee53b6 commit 0c08a8b

File tree

1 file changed

+298
-0
lines changed
  • crates/core_arch/src/arm/neon

1 file changed

+298
-0
lines changed

crates/core_arch/src/arm/neon/mod.rs

+298
Original file line numberDiff line numberDiff line change
@@ -3749,6 +3749,26 @@ pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t {
37493749
)
37503750
}
37513751

3752+
/// Duplicate vector element to vector or scalar
3753+
#[inline]
3754+
#[target_feature(enable = "neon")]
3755+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3756+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
3757+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3758+
pub unsafe fn vdupq_n_s16(value: i16) -> int16x8_t {
3759+
int16x8_t(value, value, value, value, value, value, value, value)
3760+
}
3761+
3762+
/// Duplicate vector element to vector or scalar
3763+
#[inline]
3764+
#[target_feature(enable = "neon")]
3765+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3766+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
3767+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3768+
pub unsafe fn vdupq_n_s32(value: i32) -> int32x4_t {
3769+
int32x4_t(value, value, value, value)
3770+
}
3771+
37523772
/// Duplicate vector element to vector or scalar
37533773
#[inline]
37543774
#[target_feature(enable = "neon")]
@@ -3762,6 +3782,154 @@ pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t {
37623782
)
37633783
}
37643784

3785+
/// Duplicate vector element to vector or scalar
3786+
#[inline]
3787+
#[target_feature(enable = "neon")]
3788+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3789+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
3790+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3791+
pub unsafe fn vdupq_n_u16(value: u16) -> uint16x8_t {
3792+
uint16x8_t(value, value, value, value, value, value, value, value)
3793+
}
3794+
3795+
/// Duplicate vector element to vector or scalar
3796+
#[inline]
3797+
#[target_feature(enable = "neon")]
3798+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3799+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
3800+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3801+
pub unsafe fn vdupq_n_u32(value: u32) -> uint32x4_t {
3802+
uint32x4_t(value, value, value, value)
3803+
}
3804+
3805+
/// Duplicate vector element to vector or scalar
3806+
#[inline]
3807+
#[target_feature(enable = "neon")]
3808+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3809+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
3810+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3811+
pub unsafe fn vdupq_n_p8(value: p8) -> poly8x16_t {
3812+
poly8x16_t(
3813+
value, value, value, value, value, value, value, value, value, value, value, value, value,
3814+
value, value, value,
3815+
)
3816+
}
3817+
3818+
/// Duplicate vector element to vector or scalar
3819+
#[inline]
3820+
#[target_feature(enable = "neon")]
3821+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3822+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
3823+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3824+
pub unsafe fn vdupq_n_p16(value: p16) -> poly16x8_t {
3825+
poly16x8_t(value, value, value, value, value, value, value, value)
3826+
}
3827+
3828+
/// Duplicate vector element to vector or scalar
3829+
#[inline]
3830+
#[target_feature(enable = "neon")]
3831+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3832+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
3833+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3834+
pub unsafe fn vdupq_n_f32(value: f32) -> float32x4_t {
3835+
float32x4_t(value, value, value, value)
3836+
}
3837+
3838+
/// Duplicate vector element to vector or scalar.
3839+
/// This instruction duplicates the vector element at the specified element index
3840+
/// in the source SIMD&FP register into a scalar or each element in a vector,
3841+
/// and writes the result to the destination SIMD&FP register.
3842+
3843+
/// Duplicate vector element to vector or scalar
3844+
#[inline]
3845+
#[target_feature(enable = "neon")]
3846+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3847+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
3848+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3849+
pub unsafe fn vdup_n_s8(value: i8) -> int8x8_t {
3850+
int8x8_t(value, value, value, value, value, value, value, value)
3851+
}
3852+
3853+
/// Duplicate vector element to vector or scalar
3854+
#[inline]
3855+
#[target_feature(enable = "neon")]
3856+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3857+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
3858+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3859+
pub unsafe fn vdup_n_s16(value: i16) -> int16x4_t {
3860+
int16x4_t(value, value, value, value)
3861+
}
3862+
3863+
/// Duplicate vector element to vector or scalar
3864+
#[inline]
3865+
#[target_feature(enable = "neon")]
3866+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3867+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
3868+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3869+
pub unsafe fn vdup_n_s32(value: i32) -> int32x2_t {
3870+
int32x2_t(value, value)
3871+
}
3872+
3873+
/// Duplicate vector element to vector or scalar
3874+
#[inline]
3875+
#[target_feature(enable = "neon")]
3876+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3877+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
3878+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3879+
pub unsafe fn vdup_n_u8(value: u8) -> uint8x8_t {
3880+
uint8x8_t(value, value, value, value, value, value, value, value)
3881+
}
3882+
3883+
/// Duplicate vector element to vector or scalar
3884+
#[inline]
3885+
#[target_feature(enable = "neon")]
3886+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3887+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
3888+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3889+
pub unsafe fn vdup_n_u16(value: u16) -> uint16x4_t {
3890+
uint16x4_t(value, value, value, value)
3891+
}
3892+
3893+
/// Duplicate vector element to vector or scalar
3894+
#[inline]
3895+
#[target_feature(enable = "neon")]
3896+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3897+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
3898+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3899+
pub unsafe fn vdup_n_u32(value: u32) -> uint32x2_t {
3900+
uint32x2_t(value, value)
3901+
}
3902+
3903+
/// Duplicate vector element to vector or scalar
3904+
#[inline]
3905+
#[target_feature(enable = "neon")]
3906+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3907+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
3908+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3909+
pub unsafe fn vdup_n_p8(value: p8) -> poly8x8_t {
3910+
poly8x8_t(value, value, value, value, value, value, value, value)
3911+
}
3912+
3913+
/// Duplicate vector element to vector or scalar
3914+
#[inline]
3915+
#[target_feature(enable = "neon")]
3916+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3917+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
3918+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3919+
pub unsafe fn vdup_n_p16(value: p16) -> poly16x4_t {
3920+
poly16x4_t(value, value, value, value)
3921+
}
3922+
3923+
/// Duplicate vector element to vector or scalar
3924+
#[inline]
3925+
#[target_feature(enable = "neon")]
3926+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3927+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
3928+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))]
3929+
pub unsafe fn vdup_n_f32(value: f32) -> float32x2_t {
3930+
float32x2_t(value, value)
3931+
}
3932+
37653933
/// Duplicate vector element to vector or scalar
37663934
#[inline]
37673935
#[target_feature(enable = "neon")]
@@ -5533,6 +5701,22 @@ mod tests {
55335701
assert_eq!(r, e);
55345702
}
55355703

5704+
#[simd_test(enable = "neon")]
5705+
unsafe fn test_vdupq_n_s16() {
5706+
let v: i16 = 64;
5707+
let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
5708+
let r: i16x8 = transmute(vdupq_n_s16(v));
5709+
assert_eq!(r, e);
5710+
}
5711+
5712+
#[simd_test(enable = "neon")]
5713+
unsafe fn test_vdupq_n_s32() {
5714+
let v: i32 = 64;
5715+
let e = i32x4::new(64, 64, 64, 64);
5716+
let r: i32x4 = transmute(vdupq_n_s32(v));
5717+
assert_eq!(r, e);
5718+
}
5719+
55365720
#[simd_test(enable = "neon")]
55375721
unsafe fn test_vdupq_n_u8() {
55385722
let v: u8 = 42;
@@ -5543,6 +5727,120 @@ mod tests {
55435727
assert_eq!(r, e);
55445728
}
55455729

5730+
#[simd_test(enable = "neon")]
5731+
unsafe fn test_vdupq_n_u16() {
5732+
let v: u16 = 64;
5733+
let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
5734+
let r: u16x8 = transmute(vdupq_n_u16(v));
5735+
assert_eq!(r, e);
5736+
}
5737+
5738+
#[simd_test(enable = "neon")]
5739+
unsafe fn test_vdupq_n_u32() {
5740+
let v: u32 = 64;
5741+
let e = u32x4::new(64, 64, 64, 64);
5742+
let r: u32x4 = transmute(vdupq_n_u32(v));
5743+
assert_eq!(r, e);
5744+
}
5745+
5746+
#[simd_test(enable = "neon")]
5747+
unsafe fn test_vdupq_n_p8() {
5748+
let v: p8 = 64;
5749+
let e = u8x16::new(
5750+
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
5751+
);
5752+
let r: u8x16 = transmute(vdupq_n_p8(v));
5753+
assert_eq!(r, e);
5754+
}
5755+
5756+
#[simd_test(enable = "neon")]
5757+
unsafe fn test_vdupq_n_p16() {
5758+
let v: p16 = 64;
5759+
let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
5760+
let r: u16x8 = transmute(vdupq_n_p16(v));
5761+
assert_eq!(r, e);
5762+
}
5763+
5764+
#[simd_test(enable = "neon")]
5765+
unsafe fn test_vdupq_n_f32() {
5766+
let v: f32 = 64.0;
5767+
let e = f32x4::new(64.0, 64.0, 64.0, 64.0);
5768+
let r: f32x4 = transmute(vdupq_n_f32(v));
5769+
assert_eq!(r, e);
5770+
}
5771+
5772+
#[simd_test(enable = "neon")]
5773+
unsafe fn test_vdup_n_s8() {
5774+
let v: i8 = 64;
5775+
let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
5776+
let r: i8x8 = transmute(vdup_n_s8(v));
5777+
assert_eq!(r, e);
5778+
}
5779+
5780+
#[simd_test(enable = "neon")]
5781+
unsafe fn test_vdup_n_s16() {
5782+
let v: i16 = 64;
5783+
let e = i16x4::new(64, 64, 64, 64);
5784+
let r: i16x4 = transmute(vdup_n_s16(v));
5785+
assert_eq!(r, e);
5786+
}
5787+
5788+
#[simd_test(enable = "neon")]
5789+
unsafe fn test_vdup_n_s32() {
5790+
let v: i32 = 64;
5791+
let e = i32x2::new(64, 64);
5792+
let r: i32x2 = transmute(vdup_n_s32(v));
5793+
assert_eq!(r, e);
5794+
}
5795+
5796+
#[simd_test(enable = "neon")]
5797+
unsafe fn test_vdup_n_u8() {
5798+
let v: u8 = 42;
5799+
let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
5800+
let r: u8x8 = transmute(vdup_n_u8(v));
5801+
assert_eq!(r, e);
5802+
}
5803+
5804+
#[simd_test(enable = "neon")]
5805+
unsafe fn test_vdup_n_u16() {
5806+
let v: u16 = 64;
5807+
let e = u16x4::new(64, 64, 64, 64);
5808+
let r: u16x4 = transmute(vdup_n_u16(v));
5809+
assert_eq!(r, e);
5810+
}
5811+
5812+
#[simd_test(enable = "neon")]
5813+
unsafe fn test_vdup_n_u32() {
5814+
let v: u32 = 64;
5815+
let e = u32x2::new(64, 64);
5816+
let r: u32x2 = transmute(vdup_n_u32(v));
5817+
assert_eq!(r, e);
5818+
}
5819+
5820+
#[simd_test(enable = "neon")]
5821+
unsafe fn test_vdup_n_p8() {
5822+
let v: p8 = 64;
5823+
let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
5824+
let r: u8x8 = transmute(vdup_n_p8(v));
5825+
assert_eq!(r, e);
5826+
}
5827+
5828+
#[simd_test(enable = "neon")]
5829+
unsafe fn test_vdup_n_p16() {
5830+
let v: p16 = 64;
5831+
let e = u16x4::new(64, 64, 64, 64);
5832+
let r: u16x4 = transmute(vdup_n_p16(v));
5833+
assert_eq!(r, e);
5834+
}
5835+
5836+
#[simd_test(enable = "neon")]
5837+
unsafe fn test_vdup_n_f32() {
5838+
let v: f32 = 64.0;
5839+
let e = f32x2::new(64.0, 64.0);
5840+
let r: f32x2 = transmute(vdup_n_f32(v));
5841+
assert_eq!(r, e);
5842+
}
5843+
55465844
#[simd_test(enable = "neon")]
55475845
unsafe fn test_vmovq_n_u8() {
55485846
let v: u8 = 42;

0 commit comments

Comments
 (0)