Skip to content

[LoongArch] Set isReMaterializable on LU{12,32,52}I.D/ADDI.D and {X}ORI instructions #94552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 7, 2024

Conversation

heiher
Copy link
Member

@heiher heiher commented Jun 6, 2024

No description provided.

@heiher heiher requested review from wangleiat and SixWeining June 6, 2024 01:54
@heiher heiher self-assigned this Jun 6, 2024
@llvmbot
Copy link
Member

llvmbot commented Jun 6, 2024

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/94552.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+9-1)
  • (modified) llvm/test/CodeGen/LoongArch/vector-fp-imm.ll (+34-39)
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 66bd74e068b95..6b6641aef9030 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -734,6 +734,7 @@ def ADD_W : ALU_3R<0x00100000>;
 def SUB_W : ALU_3R<0x00110000>;
 def ADDI_W : ALU_2RI12<0x02800000, simm12_addlike>;
 def ALSL_W : ALU_3RI2<0x00040000, uimm2_plus1>;
+let isReMaterializable = 1 in
 def LU12I_W : ALU_1RI20<0x14000000, simm20_lu12iw>;
 def SLT  : ALU_3R<0x00120000>;
 def SLTU : ALU_3R<0x00128000>;
@@ -749,8 +750,10 @@ def XOR  : ALU_3R<0x00158000>;
 def ANDN : ALU_3R<0x00168000>;
 def ORN  : ALU_3R<0x00160000>;
 def ANDI : ALU_2RI12<0x03400000, uimm12>;
+let isReMaterializable = 1 in {
 def ORI  : ALU_2RI12<0x03800000, uimm12_ori>;
 def XORI : ALU_2RI12<0x03c00000, uimm12>;
+}
 def MUL_W   : ALU_3R<0x001c0000>;
 def MULH_W  : ALU_3R<0x001c8000>;
 def MULH_WU : ALU_3R<0x001d0000>;
@@ -852,16 +855,21 @@ let Predicates = [IsLA64] in {
 // Arithmetic Operation Instructions for 64-bits
 def ADD_D : ALU_3R<0x00108000>;
 def SUB_D : ALU_3R<0x00118000>;
+// ADDI_D isn't always rematerializable, but isReMaterializable will be used as
+// a hint which is verified in isReallyTriviallyReMaterializable.
+let isReMaterializable = 1 in
 def ADDI_D : ALU_2RI12<0x02c00000, simm12_addlike>;
 def ADDU16I_D : ALU_2RI16<0x10000000, simm16>;
 def ALSL_WU : ALU_3RI2<0x00060000, uimm2_plus1>;
 def ALSL_D  : ALU_3RI2<0x002c0000, uimm2_plus1>;
 let Constraints = "$rd = $dst" in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+    isReMaterializable = 1 in
 def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst),
                        (ins GPR:$rd, simm20_lu32id:$imm20),
                        "$rd, $imm20">;
 }
+let isReMaterializable = 1 in
 def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>;
 def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>;
 def MUL_D     : ALU_3R<0x001d8000>;
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index 18d17751a7719..0a401ebe5f6b2 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -742,12 +742,10 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
 ; LA32F-NEXT:    st.w $a2, $sp, 32 # 4-byte Folded Spill
 ; LA32F-NEXT:    ld.w $a2, $a0, 52
 ; LA32F-NEXT:    st.w $a2, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $a2, $a0, 40
-; LA32F-NEXT:    st.w $a2, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $s8, $a0, 40
 ; LA32F-NEXT:    ld.w $a2, $a0, 44
-; LA32F-NEXT:    st.w $a2, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT:    ld.w $a2, $a0, 32
-; LA32F-NEXT:    st.w $a2, $sp, 0 # 4-byte Folded Spill
+; LA32F-NEXT:    st.w $a2, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT:    ld.w $s3, $a0, 32
 ; LA32F-NEXT:    ld.w $s4, $a0, 36
 ; LA32F-NEXT:    ld.w $s5, $a0, 24
 ; LA32F-NEXT:    ld.w $s6, $a0, 28
@@ -756,80 +754,77 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
 ; LA32F-NEXT:    ld.w $s7, $a0, 8
 ; LA32F-NEXT:    ld.w $s0, $a0, 12
 ; LA32F-NEXT:    ld.w $a2, $a0, 0
-; LA32F-NEXT:    ld.w $a3, $a0, 4
+; LA32F-NEXT:    ld.w $a4, $a0, 4
 ; LA32F-NEXT:    move $fp, $a1
-; LA32F-NEXT:    lu12i.w $s8, 261888
+; LA32F-NEXT:    lu12i.w $a3, 261888
 ; LA32F-NEXT:    move $a0, $a2
-; LA32F-NEXT:    move $a1, $a3
+; LA32F-NEXT:    move $a1, $a4
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s8
 ; LA32F-NEXT:    bl %plt(__adddf3)
 ; LA32F-NEXT:    st.w $a0, $sp, 40 # 4-byte Folded Spill
 ; LA32F-NEXT:    st.w $a1, $sp, 36 # 4-byte Folded Spill
-; LA32F-NEXT:    lu12i.w $s3, 262144
+; LA32F-NEXT:    lu12i.w $a3, 262144
 ; LA32F-NEXT:    move $a0, $s7
 ; LA32F-NEXT:    move $a1, $s0
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s3
+; LA32F-NEXT:    move $s0, $a3
 ; LA32F-NEXT:    bl %plt(__adddf3)
 ; LA32F-NEXT:    st.w $a0, $sp, 24 # 4-byte Folded Spill
 ; LA32F-NEXT:    st.w $a1, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT:    lu12i.w $s0, 262272
+; LA32F-NEXT:    lu12i.w $s7, 262272
 ; LA32F-NEXT:    move $a0, $s1
 ; LA32F-NEXT:    move $a1, $s2
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s0
+; LA32F-NEXT:    move $a3, $s7
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    st.w $a0, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT:    st.w $a1, $sp, 4 # 4-byte Folded Spill
-; LA32F-NEXT:    lu12i.w $s7, 262400
+; LA32F-NEXT:    st.w $a0, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    move $s2, $a1
+; LA32F-NEXT:    lu12i.w $a3, 262400
 ; LA32F-NEXT:    move $a0, $s5
 ; LA32F-NEXT:    move $a1, $s6
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s7
 ; LA32F-NEXT:    bl %plt(__adddf3)
 ; LA32F-NEXT:    move $s5, $a0
 ; LA32F-NEXT:    move $s6, $a1
-; LA32F-NEXT:    ld.w $a0, $sp, 0 # 4-byte Folded Reload
+; LA32F-NEXT:    move $a0, $s3
 ; LA32F-NEXT:    move $a1, $s4
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s8
+; LA32F-NEXT:    lu12i.w $a3, 261888
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s4, $a0
-; LA32F-NEXT:    move $s8, $a1
-; LA32F-NEXT:    ld.w $a0, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT:    ld.w $a1, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    move $s3, $a0
+; LA32F-NEXT:    move $s4, $a1
+; LA32F-NEXT:    move $a0, $s8
+; LA32F-NEXT:    ld.w $a1, $sp, 16 # 4-byte Folded Reload
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s3
+; LA32F-NEXT:    move $a3, $s0
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s3, $a0
-; LA32F-NEXT:    move $s1, $a1
+; LA32F-NEXT:    move $s8, $a0
+; LA32F-NEXT:    move $s0, $a1
 ; LA32F-NEXT:    ld.w $a0, $sp, 32 # 4-byte Folded Reload
 ; LA32F-NEXT:    ld.w $a1, $sp, 28 # 4-byte Folded Reload
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s0
+; LA32F-NEXT:    move $a3, $s7
 ; LA32F-NEXT:    bl %plt(__adddf3)
-; LA32F-NEXT:    move $s0, $a0
-; LA32F-NEXT:    move $s2, $a1
+; LA32F-NEXT:    move $s7, $a0
+; LA32F-NEXT:    move $s1, $a1
 ; LA32F-NEXT:    ld.w $a0, $sp, 48 # 4-byte Folded Reload
 ; LA32F-NEXT:    ld.w $a1, $sp, 44 # 4-byte Folded Reload
 ; LA32F-NEXT:    move $a2, $zero
-; LA32F-NEXT:    move $a3, $s7
+; LA32F-NEXT:    lu12i.w $a3, 262400
 ; LA32F-NEXT:    bl %plt(__adddf3)
 ; LA32F-NEXT:    st.w $a0, $fp, 56
 ; LA32F-NEXT:    st.w $a1, $fp, 60
-; LA32F-NEXT:    st.w $s0, $fp, 48
-; LA32F-NEXT:    st.w $s2, $fp, 52
-; LA32F-NEXT:    st.w $s3, $fp, 40
-; LA32F-NEXT:    st.w $s1, $fp, 44
-; LA32F-NEXT:    st.w $s4, $fp, 32
-; LA32F-NEXT:    st.w $s8, $fp, 36
+; LA32F-NEXT:    st.w $s7, $fp, 48
+; LA32F-NEXT:    st.w $s1, $fp, 52
+; LA32F-NEXT:    st.w $s8, $fp, 40
+; LA32F-NEXT:    st.w $s0, $fp, 44
+; LA32F-NEXT:    st.w $s3, $fp, 32
+; LA32F-NEXT:    st.w $s4, $fp, 36
 ; LA32F-NEXT:    st.w $s5, $fp, 24
 ; LA32F-NEXT:    st.w $s6, $fp, 28
-; LA32F-NEXT:    ld.w $a0, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT:    ld.w $a0, $sp, 12 # 4-byte Folded Reload
 ; LA32F-NEXT:    st.w $a0, $fp, 16
-; LA32F-NEXT:    ld.w $a0, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT:    st.w $a0, $fp, 20
+; LA32F-NEXT:    st.w $s2, $fp, 20
 ; LA32F-NEXT:    ld.w $a0, $sp, 24 # 4-byte Folded Reload
 ; LA32F-NEXT:    st.w $a0, $fp, 8
 ; LA32F-NEXT:    ld.w $a0, $sp, 20 # 4-byte Folded Reload

Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this patch only touch LA32 tests? Is there any LA64 tests benefit from it?

@heiher
Copy link
Member Author

heiher commented Jun 6, 2024

Does this patch only touch LA32 tests? Is there any LA64 tests benefit from it?

Not only LA32, but also LA64 benefits. In a Rust benchmark case, an instruction that generates an operand for a conditional branch can be hoisted by LICM, depending on isTriviallyReMaterializable. (I can't reproduce it in a simple case :(

// Rematerializable instructions should always be hoisted providing the
// register allocator can just pull them down again when needed.
if (isTriviallyReMaterializable(MI))
return true;

@heiher heiher merged commit f21c2fa into llvm:main Jun 7, 2024
7 checks passed
@heiher heiher deleted the rematerializable branch June 7, 2024 07:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants