Skip to content

[LoongArch] SIMD intrinsics not fully inlined in caller with target feature globally enabled #133281

Closed
@heiher

Description

@heiher

I tried this code:

#![feature(stdarch_loongarch)]
use std::arch::loongarch64::*;

pub unsafe fn simd(s: i32) -> i32 {
    lsx_vpickve2gr_b::<0>(lsx_vreplgr2vr_b(s))
}
rustc --crate-type lib -C opt-level=3 --emit llvm-ir -o lsx.ll lsx.rs

I expected to see this happen:

The lsx intrinsics are inlined within simd functions when the lsx target feature is globally enabled.

; loong64::simd
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define noundef i32 @_ZN7loong644simd17h54d99178ac0d0f82E(i32 noundef signext %s) unnamed_addr #0 {
start:
  %_2 = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 noundef %s) #2
  %_0 = tail call noundef i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %_2, i32 noundef 0) #2
  ret i32 %_0
}

; Function Attrs: nofree nosync nounwind memory(none)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) unnamed_addr #1

; Function Attrs: nofree nosync nounwind memory(none)
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32 immarg) unnamed_addr #1

attributes #0 = { nofree nosync nounwind memory(none) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }

Instead, this happened:

; core::core_arch::loongarch64::lsx::generated::lsx_vpickve2gr_b
; Function Attrs: inlinehint nofree nosync nounwind memory(argmem: read) uwtable
define internal fastcc noundef i32 @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vpickve2gr_b17hbf4a6d8f95630043E(ptr noalias nocapture noundef readonly align 16 dereferenceable(16) %a) unnamed_addr #0 {
start:
  %0 = load <16 x i8>, ptr %a, align 16
  %_0 = tail call noundef i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %0, i32 noundef 0) #4
  ret i32 %_0
}

; core::core_arch::loongarch64::lsx::generated::lsx_vreplgr2vr_b
; Function Attrs: inlinehint nofree nosync nounwind memory(argmem: write) uwtable
define internal fastcc void @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vreplgr2vr_b17h0060558a0a7e8678E(ptr dead_on_unwind noalias nocapture noundef writable writeonly align 16 dereferenceable(16) %_0, i32 noundef signext %a) unnamed_addr #1 {
start:
  %0 = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 noundef %a) #4
  store <16 x i8> %0, ptr %_0, align 16
  ret void
}

; loong64::simd
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define noundef i32 @_ZN7loong644simd17h54d99178ac0d0f82E(i32 noundef signext %s) unnamed_addr #2 {
start:
  %0 = alloca [16 x i8], align 16
; call core::core_arch::loongarch64::lsx::generated::lsx_vreplgr2vr_b
  call fastcc void @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vreplgr2vr_b17h0060558a0a7e8678E(ptr noalias nocapture noundef nonnull align 16 dereferenceable(16) %0, i32 noundef signext %s)
; call core::core_arch::loongarch64::lsx::generated::lsx_vpickve2gr_b
  %_0 = call fastcc noundef i32 @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vpickve2gr_b17hbf4a6d8f95630043E(ptr noalias nocapture noundef nonnull align 16 dereferenceable(16) %0)
  ret i32 %_0
}

; Function Attrs: nofree nosync nounwind memory(none)
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32 immarg) unnamed_addr #3

; Function Attrs: nofree nosync nounwind memory(none)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) unnamed_addr #3

attributes #0 = { inlinehint nofree nosync nounwind memory(argmem: read) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
attributes #1 = { inlinehint nofree nosync nounwind memory(argmem: write) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
attributes #2 = { nofree nosync nounwind memory(none) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx" }

Meta

rustc --version --verbose:

rustc 1.84.0-nightly (3fee0f12e 2024-11-20)
binary: rustc
commit-hash: 3fee0f12e4f595948f8f54f57c8b7a7a58127124
commit-date: 2024-11-20
host: loongarch64-unknown-linux-gnu
release: 1.84.0-nightly
LLVM version: 19.1.3

rustc -Z unstable-options --print target-spec-json:

{
  "arch": "loongarch64",
  "code-model": "medium",
  "crt-objects-fallback": "false",
  "crt-static-respected": true,
  "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128",
  "direct-access-external-data": false,
  "dynamic-linking": true,
  "env": "gnu",
  "features": "+f,+d,+lsx",
  "has-rpath": true,
  "has-thread-local": true,
  "linker-flavor": "gnu-cc",
  "llvm-abiname": "lp64d",
  "llvm-target": "loongarch64-unknown-linux-gnu",
  "max-atomic-width": 64,
  "metadata": {
    "description": "LoongArch64 Linux, LP64D ABI (kernel 5.19, glibc 2.36)",
    "host_tools": true,
    "std": true,
    "tier": 2
  },
  "os": "linux",
  "position-independent-executables": true,
  "relro-level": "full",
  "supported-sanitizers": [
    "address",
    "leak",
    "memory",
    "thread",
    "cfi"
  ],
  "supported-split-debuginfo": [
    "packed",
    "unpacked",
    "off"
  ],
  "supports-xray": true,
  "target-family": [
    "unix"
  ],
  "target-pointer-width": "64"
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-SIMDArea: SIMD (Single Instruction Multiple Data)C-bugCategory: This is a bug.O-loongarchTarget: LoongArch (LA32R, LA32S, LA64)T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.llvm-fixed-upstreamIssue expected to be fixed by the next major LLVM upgrade, or backported fixes

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions