@@ -5,7 +5,7 @@ use rustc_codegen_ssa::traits::BuilderMethods;
5
5
6
6
use crate :: { context:: CodegenCx , builder:: Builder } ;
7
7
8
- pub fn adjust_intrinsic_arguments < ' a , ' b , ' gcc , ' tcx > ( builder : & Builder < ' a , ' gcc , ' tcx > , gcc_func : FunctionPtrType < ' gcc > , mut args : Cow < ' b , [ RValue < ' gcc > ] > , func_name : & str ) -> Cow < ' b , [ RValue < ' gcc > ] > {
8
+ pub fn adjust_intrinsic_arguments < ' a , ' b , ' gcc , ' tcx > ( builder : & Builder < ' a , ' gcc , ' tcx > , gcc_func : FunctionPtrType < ' gcc > , mut args : Cow < ' b , [ RValue < ' gcc > ] > , func_name : & str , original_function_name : Option < & String > ) -> Cow < ' b , [ RValue < ' gcc > ] > {
9
9
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
10
10
// arguments here.
11
11
if gcc_func. get_param_count ( ) != args. len ( ) {
@@ -277,11 +277,23 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
277
277
let c = builder. context . new_rvalue_from_vector ( None , arg3_type, & [ new_args[ 2 ] ; 2 ] ) ;
278
278
args = vec ! [ a, b, c, new_args[ 3 ] ] . into ( ) ;
279
279
} ,
280
- "__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256" => {
281
- let mut new_args = args. to_vec ( ) ;
282
- let arg3 = & mut new_args[ 2 ] ;
283
- * arg3 = builder. context . new_unary_op ( None , UnaryOp :: Minus , arg3. get_type ( ) , * arg3) ;
284
- args = new_args. into ( ) ;
280
+ "__builtin_ia32_vfmaddsubpd256" | "__builtin_ia32_vfmaddsubps" | "__builtin_ia32_vfmaddsubps256"
281
+ | "__builtin_ia32_vfmaddsubpd" => {
282
+ if let Some ( original_function_name) = original_function_name {
283
+ match & * * original_function_name {
284
+ "llvm.x86.fma.vfmsubadd.pd.256" | "llvm.x86.fma.vfmsubadd.ps" | "llvm.x86.fma.vfmsubadd.ps.256"
285
+ | "llvm.x86.fma.vfmsubadd.pd" => {
286
+ // NOTE: since both llvm.x86.fma.vfmsubadd.ps and llvm.x86.fma.vfmaddsub.ps maps to
287
+ // __builtin_ia32_vfmaddsubps, only add minus if this comes from a
288
+ // subadd LLVM intrinsic, e.g. _mm256_fmsubadd_pd.
289
+ let mut new_args = args. to_vec ( ) ;
290
+ let arg3 = & mut new_args[ 2 ] ;
291
+ * arg3 = builder. context . new_unary_op ( None , UnaryOp :: Minus , arg3. get_type ( ) , * arg3) ;
292
+ args = new_args. into ( ) ;
293
+ } ,
294
+ _ => ( ) ,
295
+ }
296
+ }
285
297
} ,
286
298
"__builtin_ia32_ldmxcsr" => {
287
299
// The builtin __builtin_ia32_ldmxcsr takes an integer value while llvm.x86.sse.ldmxcsr takes a pointer,
@@ -583,6 +595,8 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
583
595
"llvm.fshr.v32i16" => "__builtin_ia32_vpshrdv_v32hi" ,
584
596
"llvm.fshr.v16i16" => "__builtin_ia32_vpshrdv_v16hi" ,
585
597
"llvm.fshr.v8i16" => "__builtin_ia32_vpshrdv_v8hi" ,
598
+ "llvm.x86.fma.vfmadd.sd" => "__builtin_ia32_vfmaddsd3" ,
599
+ "llvm.x86.fma.vfmadd.ss" => "__builtin_ia32_vfmaddss3" ,
586
600
587
601
// The above doc points to unknown builtins for the following, so override them:
588
602
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si" ,
0 commit comments