-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LoongArch] Set isReMaterializable on LU{12,32,52}I.D/ADDI.D and {X}ORI instructions #94552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesFull diff: https://github.com/llvm/llvm-project/pull/94552.diff 2 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 66bd74e068b95..6b6641aef9030 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -734,6 +734,7 @@ def ADD_W : ALU_3R<0x00100000>;
def SUB_W : ALU_3R<0x00110000>;
def ADDI_W : ALU_2RI12<0x02800000, simm12_addlike>;
def ALSL_W : ALU_3RI2<0x00040000, uimm2_plus1>;
+let isReMaterializable = 1 in
def LU12I_W : ALU_1RI20<0x14000000, simm20_lu12iw>;
def SLT : ALU_3R<0x00120000>;
def SLTU : ALU_3R<0x00128000>;
@@ -749,8 +750,10 @@ def XOR : ALU_3R<0x00158000>;
def ANDN : ALU_3R<0x00168000>;
def ORN : ALU_3R<0x00160000>;
def ANDI : ALU_2RI12<0x03400000, uimm12>;
+let isReMaterializable = 1 in {
def ORI : ALU_2RI12<0x03800000, uimm12_ori>;
def XORI : ALU_2RI12<0x03c00000, uimm12>;
+}
def MUL_W : ALU_3R<0x001c0000>;
def MULH_W : ALU_3R<0x001c8000>;
def MULH_WU : ALU_3R<0x001d0000>;
@@ -852,16 +855,21 @@ let Predicates = [IsLA64] in {
// Arithmetic Operation Instructions for 64-bits
def ADD_D : ALU_3R<0x00108000>;
def SUB_D : ALU_3R<0x00118000>;
+// ADDI_D isn't always rematerializable, but isReMaterializable will be used as
+// a hint which is verified in isReallyTriviallyReMaterializable.
+let isReMaterializable = 1 in
def ADDI_D : ALU_2RI12<0x02c00000, simm12_addlike>;
def ADDU16I_D : ALU_2RI16<0x10000000, simm16>;
def ALSL_WU : ALU_3RI2<0x00060000, uimm2_plus1>;
def ALSL_D : ALU_3RI2<0x002c0000, uimm2_plus1>;
let Constraints = "$rd = $dst" in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+ isReMaterializable = 1 in
def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst),
(ins GPR:$rd, simm20_lu32id:$imm20),
"$rd, $imm20">;
}
+let isReMaterializable = 1 in
def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>;
def PCADDU18I : ALU_1RI20<0x1e000000, simm20_pcaddu18i>;
def MUL_D : ALU_3R<0x001d8000>;
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index 18d17751a7719..0a401ebe5f6b2 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -742,12 +742,10 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
; LA32F-NEXT: st.w $a2, $sp, 32 # 4-byte Folded Spill
; LA32F-NEXT: ld.w $a2, $a0, 52
; LA32F-NEXT: st.w $a2, $sp, 28 # 4-byte Folded Spill
-; LA32F-NEXT: ld.w $a2, $a0, 40
-; LA32F-NEXT: st.w $a2, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT: ld.w $s8, $a0, 40
; LA32F-NEXT: ld.w $a2, $a0, 44
-; LA32F-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill
-; LA32F-NEXT: ld.w $a2, $a0, 32
-; LA32F-NEXT: st.w $a2, $sp, 0 # 4-byte Folded Spill
+; LA32F-NEXT: st.w $a2, $sp, 16 # 4-byte Folded Spill
+; LA32F-NEXT: ld.w $s3, $a0, 32
; LA32F-NEXT: ld.w $s4, $a0, 36
; LA32F-NEXT: ld.w $s5, $a0, 24
; LA32F-NEXT: ld.w $s6, $a0, 28
@@ -756,80 +754,77 @@ define void @test_d8(ptr %P, ptr %S) nounwind {
; LA32F-NEXT: ld.w $s7, $a0, 8
; LA32F-NEXT: ld.w $s0, $a0, 12
; LA32F-NEXT: ld.w $a2, $a0, 0
-; LA32F-NEXT: ld.w $a3, $a0, 4
+; LA32F-NEXT: ld.w $a4, $a0, 4
; LA32F-NEXT: move $fp, $a1
-; LA32F-NEXT: lu12i.w $s8, 261888
+; LA32F-NEXT: lu12i.w $a3, 261888
; LA32F-NEXT: move $a0, $a2
-; LA32F-NEXT: move $a1, $a3
+; LA32F-NEXT: move $a1, $a4
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s8
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $sp, 40 # 4-byte Folded Spill
; LA32F-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill
-; LA32F-NEXT: lu12i.w $s3, 262144
+; LA32F-NEXT: lu12i.w $a3, 262144
; LA32F-NEXT: move $a0, $s7
; LA32F-NEXT: move $a1, $s0
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s3
+; LA32F-NEXT: move $s0, $a3
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $sp, 24 # 4-byte Folded Spill
; LA32F-NEXT: st.w $a1, $sp, 20 # 4-byte Folded Spill
-; LA32F-NEXT: lu12i.w $s0, 262272
+; LA32F-NEXT: lu12i.w $s7, 262272
; LA32F-NEXT: move $a0, $s1
; LA32F-NEXT: move $a1, $s2
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s0
+; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: bl %plt(__adddf3)
-; LA32F-NEXT: st.w $a0, $sp, 8 # 4-byte Folded Spill
-; LA32F-NEXT: st.w $a1, $sp, 4 # 4-byte Folded Spill
-; LA32F-NEXT: lu12i.w $s7, 262400
+; LA32F-NEXT: st.w $a0, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT: move $s2, $a1
+; LA32F-NEXT: lu12i.w $a3, 262400
; LA32F-NEXT: move $a0, $s5
; LA32F-NEXT: move $a1, $s6
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: move $s5, $a0
; LA32F-NEXT: move $s6, $a1
-; LA32F-NEXT: ld.w $a0, $sp, 0 # 4-byte Folded Reload
+; LA32F-NEXT: move $a0, $s3
; LA32F-NEXT: move $a1, $s4
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s8
+; LA32F-NEXT: lu12i.w $a3, 261888
; LA32F-NEXT: bl %plt(__adddf3)
-; LA32F-NEXT: move $s4, $a0
-; LA32F-NEXT: move $s8, $a1
-; LA32F-NEXT: ld.w $a0, $sp, 16 # 4-byte Folded Reload
-; LA32F-NEXT: ld.w $a1, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT: move $s3, $a0
+; LA32F-NEXT: move $s4, $a1
+; LA32F-NEXT: move $a0, $s8
+; LA32F-NEXT: ld.w $a1, $sp, 16 # 4-byte Folded Reload
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s3
+; LA32F-NEXT: move $a3, $s0
; LA32F-NEXT: bl %plt(__adddf3)
-; LA32F-NEXT: move $s3, $a0
-; LA32F-NEXT: move $s1, $a1
+; LA32F-NEXT: move $s8, $a0
+; LA32F-NEXT: move $s0, $a1
; LA32F-NEXT: ld.w $a0, $sp, 32 # 4-byte Folded Reload
; LA32F-NEXT: ld.w $a1, $sp, 28 # 4-byte Folded Reload
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s0
+; LA32F-NEXT: move $a3, $s7
; LA32F-NEXT: bl %plt(__adddf3)
-; LA32F-NEXT: move $s0, $a0
-; LA32F-NEXT: move $s2, $a1
+; LA32F-NEXT: move $s7, $a0
+; LA32F-NEXT: move $s1, $a1
; LA32F-NEXT: ld.w $a0, $sp, 48 # 4-byte Folded Reload
; LA32F-NEXT: ld.w $a1, $sp, 44 # 4-byte Folded Reload
; LA32F-NEXT: move $a2, $zero
-; LA32F-NEXT: move $a3, $s7
+; LA32F-NEXT: lu12i.w $a3, 262400
; LA32F-NEXT: bl %plt(__adddf3)
; LA32F-NEXT: st.w $a0, $fp, 56
; LA32F-NEXT: st.w $a1, $fp, 60
-; LA32F-NEXT: st.w $s0, $fp, 48
-; LA32F-NEXT: st.w $s2, $fp, 52
-; LA32F-NEXT: st.w $s3, $fp, 40
-; LA32F-NEXT: st.w $s1, $fp, 44
-; LA32F-NEXT: st.w $s4, $fp, 32
-; LA32F-NEXT: st.w $s8, $fp, 36
+; LA32F-NEXT: st.w $s7, $fp, 48
+; LA32F-NEXT: st.w $s1, $fp, 52
+; LA32F-NEXT: st.w $s8, $fp, 40
+; LA32F-NEXT: st.w $s0, $fp, 44
+; LA32F-NEXT: st.w $s3, $fp, 32
+; LA32F-NEXT: st.w $s4, $fp, 36
; LA32F-NEXT: st.w $s5, $fp, 24
; LA32F-NEXT: st.w $s6, $fp, 28
-; LA32F-NEXT: ld.w $a0, $sp, 8 # 4-byte Folded Reload
+; LA32F-NEXT: ld.w $a0, $sp, 12 # 4-byte Folded Reload
; LA32F-NEXT: st.w $a0, $fp, 16
-; LA32F-NEXT: ld.w $a0, $sp, 4 # 4-byte Folded Reload
-; LA32F-NEXT: st.w $a0, $fp, 20
+; LA32F-NEXT: st.w $s2, $fp, 20
; LA32F-NEXT: ld.w $a0, $sp, 24 # 4-byte Folded Reload
; LA32F-NEXT: st.w $a0, $fp, 8
; LA32F-NEXT: ld.w $a0, $sp, 20 # 4-byte Folded Reload
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this patch only touch LA32 tests? Is there any LA64 tests benefit from it?
Not only LA32, but also LA64 benefits. In a Rust benchmark case, an instruction that generates an operand for a conditional branch can be hoisted by LICM, depending on isTriviallyReMaterializable. (I can't reproduce it in a simple case :( llvm-project/llvm/lib/CodeGen/MachineLICM.cpp Lines 1212 to 1215 in ea32197
|
No description provided.