Skip to content

NVPTX: non-inlined functions can't be used cross crate #38787

Closed
@japaric

Description

@japaric

Or all functions should be "inlined" for the nvptx targets

STR

$ cargo new --lib lib

$ edit lib/src/lib.rs && cat $_
#![no_std]

pub fn foo() -> i32 {
    42
}
$ cargo new --lib kernel && cd $_

$ edit src/lib.rs && cat $_
#![no_std]

extern crate lib;

fn bar() -> i32 {
    lib::foo()
}
$ edit Cargo.toml && tail -n5 $_
[dependencies]
lib = "../lib"

[profile.dev]
debug = false  # cf. rust-lang/rust#38785
$ edit nvptx64-nvidia-cuda.json && cat $_
{
  "arch": "nvptx64",
  "cpu": "sm_20",
  "data-layout": "e-i64:64-v16:16-v32:32-n16:32:64",
  "llvm-target": "nvptx64-nvidia-cuda",
  "max-atomic-width": 0,
  "os": "cuda",
  "panic-strategy": "abort",
  "target-endian": "little",
  "target-pointer-width": "64"
}
$ cargo install xargo --vers 0.3.0 || true

$ xargo rustc --target nvptx64-nvidia-cuda -- --emit=asm

$ cat $(find target/nvptx64-nvidia-cuda/debug -name '*.s')
.version 3.2
.target sm_20
.address_size 64

.extern .func  (.param .b32 func_retval0) _ZN3lib3foo17h3feefc42e145764bE
()
;

.func  (.param .b32 func_retval0) _ZN6kernel3bar17h7b762ababaef9f36E()
{
        .reg .s32       %r<2>;

        bra.uni         LBB0_1;
LBB0_1:
        { // callseq 0
        .reg .b32 temp_param_reg;
        .param .b32 retval0;
        call.uni (retval0),
        _ZN3lib3foo17h3feefc42e145764bE,
        (
        );
        ld.param.b32    %r1, [retval0+0];
        } // callseq 0
        bra.uni         LBB0_2;
LBB0_2:
        st.param.b32    [func_retval0+0], %r1;
        ret;
}

Note that foo appears as undefined (.extern .func) in the PTX module.

Workaround

Mark lib::foo as #[inline]. Then you'll get the translation of foo in the final PTX module:

.version 3.2
.target sm_20
.address_size 64


.func  (.param .b32 func_retval0) _ZN3lib3foo17h3feefc42e145764bE()
{
        .reg .s32       %r<2>;

        bra.uni         LBB0_1;
LBB0_1:
        mov.u32         %r1, 42;
        st.param.b32    [func_retval0+0], %r1;
        ret;
}

.func  (.param .b32 func_retval0) _ZN6kernel3bar17h7b762ababaef9f36E()
{
        .reg .s32       %r<2>;

        bra.uni         LBB1_1;
LBB1_1:
        { // callseq 0
        .reg .b32 temp_param_reg;
        .param .b32 retval0;
        call.uni (retval0),
        _ZN3lib3foo17h3feefc42e145764bE,
        (
        );
        ld.param.b32    %r1, [retval0+0];
        } // callseq 0
        bra.uni         LBB1_2;
LBB1_2:
        st.param.b32    [func_retval0+0], %r1;
        ret;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-bugCategory: This is a bug.O-NVPTXTarget: the NVPTX LLVM backend for running rust on GPUs, https://llvm.org/docs/NVPTXUsage.html

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions