Skip to content

Commit cd8fcad

Browse files
authored
Unrolled build for rust-lang#125980
Rollup merge of rust-lang#125980 - kjetilkjeka:nvptx_remove_direct_passmode, r=davidtwco Nvptx remove direct passmode This PR does what should have been done in rust-lang#117671. That is fully avoid using the `PassMode::Direct` for `extern "C" fn` for `nvptx64-nvidia-cuda` and enable the compatibility test. `@RalfJung` [pointed me in the right direction](rust-lang#117480 (comment)) for solving this issue. There are still some ABI bugs after this PR is merged. These ABI tests are created based on what is actually correct, and since they continue passing with even more of them enabled things are improving. I don't have the time to tackle all the remaining issues right now, but I think getting these improvements merged is very valuable in themselves and plan to tackle more of them long term. This also doesn't remove the use of `PassMode::Direct` for `extern "ptx-kernel" fn`. This was also not trivial to make work. And since the ABI is hidden behind an unstable feature it's less urgent. I don't know if it's correct to request `@RalfJung` as a reviewer (due to team structures), but he helped me a lot to figure out this stuff. If that's not appropriate then `@davidtwco` would be a good candidate since he know about this topic from rust-lang#117671 r​? `@RalfJung`
2 parents 02c7a59 + a49fe0a commit cd8fcad

File tree

5 files changed

+73
-16
lines changed

5 files changed

+73
-16
lines changed

compiler/rustc_target/src/abi/call/nvptx64.rs

+39-8
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,54 @@
11
use crate::abi::call::{ArgAbi, FnAbi, PassMode, Reg, Size, Uniform};
22
use crate::abi::{HasDataLayout, TyAbiInterface};
33

4+
use super::{ArgAttribute, ArgAttributes, ArgExtension, CastTarget};
5+
46
fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
5-
if ret.layout.is_aggregate() && ret.layout.size.bits() > 64 {
6-
ret.make_indirect();
7-
} else {
8-
// FIXME: this is wrong! Need to decide which ABI we really want here.
9-
ret.make_direct_deprecated();
7+
if ret.layout.is_aggregate() && ret.layout.is_sized() {
8+
classify_aggregate(ret)
9+
} else if ret.layout.size.bits() < 32 && ret.layout.is_sized() {
10+
ret.extend_integer_width_to(32);
1011
}
1112
}
1213

1314
fn classify_arg<Ty>(arg: &mut ArgAbi<'_, Ty>) {
14-
if arg.layout.is_aggregate() {
15-
arg.make_indirect_byval(None);
16-
} else if arg.layout.size.bits() < 32 {
15+
if arg.layout.is_aggregate() && arg.layout.is_sized() {
16+
classify_aggregate(arg)
17+
} else if arg.layout.size.bits() < 32 && arg.layout.is_sized() {
1718
arg.extend_integer_width_to(32);
1819
}
1920
}
2021

22+
/// the pass mode used for aggregates in arg and ret position
23+
fn classify_aggregate<Ty>(arg: &mut ArgAbi<'_, Ty>) {
24+
let align_bytes = arg.layout.align.abi.bytes();
25+
let size = arg.layout.size;
26+
27+
let reg = match align_bytes {
28+
1 => Reg::i8(),
29+
2 => Reg::i16(),
30+
4 => Reg::i32(),
31+
8 => Reg::i64(),
32+
16 => Reg::i128(),
33+
_ => unreachable!("Align is given as power of 2 no larger than 16 bytes"),
34+
};
35+
36+
if align_bytes == size.bytes() {
37+
arg.cast_to(CastTarget {
38+
prefix: [Some(reg), None, None, None, None, None, None, None],
39+
rest: Uniform::new(Reg::i8(), Size::from_bytes(0)),
40+
attrs: ArgAttributes {
41+
regular: ArgAttribute::default(),
42+
arg_ext: ArgExtension::None,
43+
pointee_size: Size::ZERO,
44+
pointee_align: None,
45+
},
46+
});
47+
} else {
48+
arg.cast_to(Uniform::new(reg, size));
49+
}
50+
}
51+
2152
fn classify_arg_kernel<'a, Ty, C>(_cx: &C, arg: &mut ArgAbi<'a, Ty>)
2253
where
2354
Ty: TyAbiInterface<'a, C> + Copy,

tests/assembly/nvptx-c-abi-arg-v7.rs

+10
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ pub struct TripleU16 {
4040
h: u16,
4141
}
4242
#[repr(C)]
43+
pub struct DoubleI32 {
44+
f: i32,
45+
g: i32,
46+
}
47+
#[repr(C)]
4348
pub struct TripleU32 {
4449
f: u32,
4550
g: u32,
@@ -175,6 +180,11 @@ pub unsafe extern "C" fn f_triple_u16_arg(_a: TripleU16) {}
175180
#[no_mangle]
176181
pub unsafe extern "C" fn f_triple_u32_arg(_a: TripleU32) {}
177182

183+
// CHECK: .visible .func f_double_i32_arg(
184+
// CHECK: .param .align 4 .b8 f_double_i32_arg_param_0[8]
185+
#[no_mangle]
186+
pub unsafe extern "C" fn f_double_i32_arg(_a: DoubleI32) {}
187+
178188
// CHECK: .visible .func f_triple_u64_arg(
179189
// CHECK: .param .align 8 .b8 f_triple_u64_arg_param_0[24]
180190
#[no_mangle]

tests/assembly/nvptx-c-abi-ret-v7.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//@ assembly-output: ptx-linker
22
//@ compile-flags: --crate-type cdylib -C target-cpu=sm_86 -Z unstable-options -Clinker-flavor=llbc
33
//@ only-nvptx64
4-
//@ ignore-nvptx64
54

65
// The PTX ABI stability is tied to major versions of the PTX ISA
76
// These tests assume major version 7
@@ -41,6 +40,11 @@ pub struct TripleU16 {
4140
h: u16,
4241
}
4342
#[repr(C)]
43+
pub struct DoubleI32 {
44+
f: i32,
45+
g: i32,
46+
}
47+
#[repr(C)]
4448
pub struct TripleU32 {
4549
f: u32,
4650
g: u32,
@@ -187,6 +191,12 @@ pub unsafe extern "C" fn f_triple_u16_ret() -> TripleU16 {
187191
TripleU16 { f: 18, g: 19, h: 20 }
188192
}
189193

194+
// CHECK: .visible .func (.param .align 4 .b8 func_retval0[8]) f_double_i32_ret(
195+
#[no_mangle]
196+
pub unsafe extern "C" fn f_double_i32_ret() -> DoubleI32 {
197+
DoubleI32 { f: 1, g: 2 }
198+
}
199+
190200
// CHECK: .visible .func (.param .align 4 .b8 func_retval0[12]) f_triple_u32_ret(
191201
#[no_mangle]
192202
pub unsafe extern "C" fn f_triple_u32_ret() -> TripleU32 {

tests/assembly/nvptx-kernel-abi/nvptx-kernel-args-abi-v7.rs

+10
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ pub struct TripleU16 {
5050
h: u16,
5151
}
5252
#[repr(C)]
53+
pub struct DoubleI32 {
54+
f: i32,
55+
g: i32,
56+
}
57+
#[repr(C)]
5358
pub struct TripleU32 {
5459
f: u32,
5560
g: u32,
@@ -180,6 +185,11 @@ pub unsafe extern "ptx-kernel" fn f_triple_u8_arg(_a: TripleU8) {}
180185
#[no_mangle]
181186
pub unsafe extern "ptx-kernel" fn f_triple_u16_arg(_a: TripleU16) {}
182187

188+
// CHECK: .visible .entry f_double_i32_arg(
189+
// CHECK: .param .align 4 .b8 f_double_i32_arg_param_0[8]
190+
#[no_mangle]
191+
pub unsafe extern "ptx-kernel" fn f_double_i32_arg(_a: DoubleI32) {}
192+
183193
// CHECK: .visible .entry f_triple_u32_arg(
184194
// CHECK: .param .align 4 .b8 f_triple_u32_arg_param_0[12]
185195
#[no_mangle]

tests/ui/abi/compatibility.rs

+3-7
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,9 @@
5555
//@ revisions: csky
5656
//@[csky] compile-flags: --target csky-unknown-linux-gnuabiv2
5757
//@[csky] needs-llvm-components: csky
58-
59-
// FIXME: disabled on nvptx64 since the target ABI fails the sanity check
60-
// see https://github.com/rust-lang/rust/issues/117480
61-
/* revisions: nvptx64
62-
[nvptx64] compile-flags: --target nvptx64-nvidia-cuda
63-
[nvptx64] needs-llvm-components: nvptx
64-
*/
58+
//@ revisions: nvptx64
59+
//@[nvptx64] compile-flags: --target nvptx64-nvidia-cuda
60+
//@[nvptx64] needs-llvm-components: nvptx
6561
#![feature(rustc_attrs, unsized_fn_params, transparent_unions)]
6662
#![cfg_attr(not(host), feature(no_core, lang_items), no_std, no_core)]
6763
#![allow(unused, improper_ctypes_definitions, internal_features)]

0 commit comments

Comments
 (0)