-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV] Use ForceTailAgnostic for masked vmsbf/vmsif/vmsof.m. #94532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
These instructions use the mask policy, but always update the destination under tail agnostic policy.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThese instructions use the mask policy, but always update the destination under tail agnostic policy. Full diff: https://github.com/llvm/llvm-project/pull/94532.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 5fe5a7a5bd5cc..222909d24e539 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2059,6 +2059,7 @@ multiclass VPseudoVSFS_M {
def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>,
SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
forceMergeOpRead=true>;
+ let ForceTailAgnostic = true in
def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>,
SchedUnary<"WriteVMSFSV", "ReadVMSFSV", mx,
forceMergeOpRead=true>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll
index 14a1f084c3985..d1f344d52763d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf.ll
@@ -33,7 +33,7 @@ define <vscale x 1 x i1> @intrinsic_vmsbf_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -75,7 +75,7 @@ define <vscale x 2 x i1> @intrinsic_vmsbf_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 4 x i1> @intrinsic_vmsbf_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -159,9 +159,9 @@ define <vscale x 8 x i1> @intrinsic_vmsbf_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i1> @llvm.riscv.vmsbf.mask.nxv8i1(
@@ -201,7 +201,7 @@ define <vscale x 16 x i1> @intrinsic_vmsbf_mask_m_nxv16i1_nxv16i1(<vscale x 16 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define <vscale x 32 x i1> @intrinsic_vmsbf_mask_m_nxv32i1_nxv32i1(<vscale x 32 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -285,7 +285,7 @@ define <vscale x 64 x i1> @intrinsic_vmsbf_mask_m_nxv64i1_nxv64i1(<vscale x 64 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vmsbf.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
index 05d402afc934c..1dc52eb55455b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
@@ -33,7 +33,7 @@ define <vscale x 1 x i1> @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -75,7 +75,7 @@ define <vscale x 2 x i1> @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 4 x i1> @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -159,9 +159,9 @@ define <vscale x 8 x i1> @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i1> @llvm.riscv.vmsif.mask.nxv8i1(
@@ -201,7 +201,7 @@ define <vscale x 16 x i1> @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1(<vscale x 16 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define <vscale x 32 x i1> @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1(<vscale x 32 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -285,7 +285,7 @@ define <vscale x 64 x i1> @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1(<vscale x 64 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vmsif.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
index 0c60681ea8de0..b0a28e6e455b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
@@ -33,7 +33,7 @@ define <vscale x 1 x i1> @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -75,7 +75,7 @@ define <vscale x 2 x i1> @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -117,7 +117,7 @@ define <vscale x 4 x i1> @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -159,9 +159,9 @@ define <vscale x 8 x i1> @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1>
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
+; CHECK-NEXT: vmv.v.v v0, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i1> @llvm.riscv.vmsof.mask.nxv8i1(
@@ -201,7 +201,7 @@ define <vscale x 16 x i1> @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1(<vscale x 16 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -243,7 +243,7 @@ define <vscale x 32 x i1> @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1(<vscale x 32 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
@@ -285,7 +285,7 @@ define <vscale x 64 x i1> @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1(<vscale x 64 x
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: vmv1r.v v0, v9
-; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
; CHECK-NEXT: vmsof.m v10, v8, v0.t
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: ret
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
; CHECK-NEXT: vmsbf.m v10, v8, v0.t | ||
; CHECK-NEXT: vmv1r.v v0, v10 | ||
; CHECK-NEXT: vmv.v.v v0, v10 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this a regression? Because vmv.v.v depends on vtype/vl, while vm1r.v doesn't.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably depends on the microarchitecture. If VL is less than vlmax, then it can save copying some elements on some microarchitectures. RISCVInstrInfo::copyPhysRegVector
turns vmv1r.v into vmv.v.v.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I see, I forgot it. Thanks!
These instructions use the mask policy, but always update the destination under tail agnostic policy.