Closed
Description
I tried this code:
#![feature(stdarch_loongarch)]
use std::arch::loongarch64::*;
pub unsafe fn simd(s: i32) -> i32 {
lsx_vpickve2gr_b::<0>(lsx_vreplgr2vr_b(s))
}
rustc --crate-type lib -C opt-level=3 --emit llvm-ir -o lsx.ll lsx.rs
I expected to see this happen:
The lsx
intrinsics are inlined within simd
functions when the lsx
target feature is globally enabled.
; loong64::simd
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define noundef i32 @_ZN7loong644simd17h54d99178ac0d0f82E(i32 noundef signext %s) unnamed_addr #0 {
start:
%_2 = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 noundef %s) #2
%_0 = tail call noundef i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %_2, i32 noundef 0) #2
ret i32 %_0
}
; Function Attrs: nofree nosync nounwind memory(none)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) unnamed_addr #1
; Function Attrs: nofree nosync nounwind memory(none)
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32 immarg) unnamed_addr #1
attributes #0 = { nofree nosync nounwind memory(none) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
Instead, this happened:
; core::core_arch::loongarch64::lsx::generated::lsx_vpickve2gr_b
; Function Attrs: inlinehint nofree nosync nounwind memory(argmem: read) uwtable
define internal fastcc noundef i32 @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vpickve2gr_b17hbf4a6d8f95630043E(ptr noalias nocapture noundef readonly align 16 dereferenceable(16) %a) unnamed_addr #0 {
start:
%0 = load <16 x i8>, ptr %a, align 16
%_0 = tail call noundef i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %0, i32 noundef 0) #4
ret i32 %_0
}
; core::core_arch::loongarch64::lsx::generated::lsx_vreplgr2vr_b
; Function Attrs: inlinehint nofree nosync nounwind memory(argmem: write) uwtable
define internal fastcc void @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vreplgr2vr_b17h0060558a0a7e8678E(ptr dead_on_unwind noalias nocapture noundef writable writeonly align 16 dereferenceable(16) %_0, i32 noundef signext %a) unnamed_addr #1 {
start:
%0 = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 noundef %a) #4
store <16 x i8> %0, ptr %_0, align 16
ret void
}
; loong64::simd
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define noundef i32 @_ZN7loong644simd17h54d99178ac0d0f82E(i32 noundef signext %s) unnamed_addr #2 {
start:
%0 = alloca [16 x i8], align 16
; call core::core_arch::loongarch64::lsx::generated::lsx_vreplgr2vr_b
call fastcc void @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vreplgr2vr_b17h0060558a0a7e8678E(ptr noalias nocapture noundef nonnull align 16 dereferenceable(16) %0, i32 noundef signext %s)
; call core::core_arch::loongarch64::lsx::generated::lsx_vpickve2gr_b
%_0 = call fastcc noundef i32 @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vpickve2gr_b17hbf4a6d8f95630043E(ptr noalias nocapture noundef nonnull align 16 dereferenceable(16) %0)
ret i32 %_0
}
; Function Attrs: nofree nosync nounwind memory(none)
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32 immarg) unnamed_addr #3
; Function Attrs: nofree nosync nounwind memory(none)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) unnamed_addr #3
attributes #0 = { inlinehint nofree nosync nounwind memory(argmem: read) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
attributes #1 = { inlinehint nofree nosync nounwind memory(argmem: write) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
attributes #2 = { nofree nosync nounwind memory(none) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx" }
Meta
rustc --version --verbose
:
rustc 1.84.0-nightly (3fee0f12e 2024-11-20)
binary: rustc
commit-hash: 3fee0f12e4f595948f8f54f57c8b7a7a58127124
commit-date: 2024-11-20
host: loongarch64-unknown-linux-gnu
release: 1.84.0-nightly
LLVM version: 19.1.3
rustc -Z unstable-options --print target-spec-json
:
{
"arch": "loongarch64",
"code-model": "medium",
"crt-objects-fallback": "false",
"crt-static-respected": true,
"data-layout": "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128",
"direct-access-external-data": false,
"dynamic-linking": true,
"env": "gnu",
"features": "+f,+d,+lsx",
"has-rpath": true,
"has-thread-local": true,
"linker-flavor": "gnu-cc",
"llvm-abiname": "lp64d",
"llvm-target": "loongarch64-unknown-linux-gnu",
"max-atomic-width": 64,
"metadata": {
"description": "LoongArch64 Linux, LP64D ABI (kernel 5.19, glibc 2.36)",
"host_tools": true,
"std": true,
"tier": 2
},
"os": "linux",
"position-independent-executables": true,
"relro-level": "full",
"supported-sanitizers": [
"address",
"leak",
"memory",
"thread",
"cfi"
],
"supported-split-debuginfo": [
"packed",
"unpacked",
"off"
],
"supports-xray": true,
"target-family": [
"unix"
],
"target-pointer-width": "64"
}
Metadata
Metadata
Assignees
Labels
Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Area: SIMD (Single Instruction Multiple Data)Category: This is a bug.Target: LoongArch (LA32R, LA32S, LA64)Relevant to the compiler team, which will review and decide on the PR/issue.Issue expected to be fixed by the next major LLVM upgrade, or backported fixes