Closed
Description
Or all functions should be "inlined" for the nvptx targets
STR
$ cargo new --lib lib
$ edit lib/src/lib.rs && cat $_
#![no_std]
pub fn foo() -> i32 {
42
}
$ cargo new --lib kernel && cd $_
$ edit src/lib.rs && cat $_
#![no_std]
extern crate lib;
fn bar() -> i32 {
lib::foo()
}
$ edit Cargo.toml && tail -n5 $_
[dependencies]
lib = "../lib"
[profile.dev]
debug = false # cf. rust-lang/rust#38785
$ edit nvptx64-nvidia-cuda.json && cat $_
{
"arch": "nvptx64",
"cpu": "sm_20",
"data-layout": "e-i64:64-v16:16-v32:32-n16:32:64",
"llvm-target": "nvptx64-nvidia-cuda",
"max-atomic-width": 0,
"os": "cuda",
"panic-strategy": "abort",
"target-endian": "little",
"target-pointer-width": "64"
}
$ cargo install xargo --vers 0.3.0 || true
$ xargo rustc --target nvptx64-nvidia-cuda -- --emit=asm
$ cat $(find target/nvptx64-nvidia-cuda/debug -name '*.s')
.version 3.2
.target sm_20
.address_size 64
.extern .func (.param .b32 func_retval0) _ZN3lib3foo17h3feefc42e145764bE
()
;
.func (.param .b32 func_retval0) _ZN6kernel3bar17h7b762ababaef9f36E()
{
.reg .s32 %r<2>;
bra.uni LBB0_1;
LBB0_1:
{ // callseq 0
.reg .b32 temp_param_reg;
.param .b32 retval0;
call.uni (retval0),
_ZN3lib3foo17h3feefc42e145764bE,
(
);
ld.param.b32 %r1, [retval0+0];
} // callseq 0
bra.uni LBB0_2;
LBB0_2:
st.param.b32 [func_retval0+0], %r1;
ret;
}
Note that foo
appears as undefined (.extern .func
) in the PTX module.
Workaround
Mark lib::foo
as #[inline]
. Then you'll get the translation of foo
in the final PTX module:
.version 3.2
.target sm_20
.address_size 64
.func (.param .b32 func_retval0) _ZN3lib3foo17h3feefc42e145764bE()
{
.reg .s32 %r<2>;
bra.uni LBB0_1;
LBB0_1:
mov.u32 %r1, 42;
st.param.b32 [func_retval0+0], %r1;
ret;
}
.func (.param .b32 func_retval0) _ZN6kernel3bar17h7b762ababaef9f36E()
{
.reg .s32 %r<2>;
bra.uni LBB1_1;
LBB1_1:
{ // callseq 0
.reg .b32 temp_param_reg;
.param .b32 retval0;
call.uni (retval0),
_ZN3lib3foo17h3feefc42e145764bE,
(
);
ld.param.b32 %r1, [retval0+0];
} // callseq 0
bra.uni LBB1_2;
LBB1_2:
st.param.b32 [func_retval0+0], %r1;
ret;
}