-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86][AMX] Fix missing stride register for tileloadd #110226
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesFixes: #110190 Full diff: https://github.com/llvm/llvm-project/pull/110226.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index 613722b398f446..1184460acc4aff 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -140,14 +140,16 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
MachineInstr *NewMI =
addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
.addReg(SrcReg, getKillRegState(SrcMO.isKill()));
- MachineOperand &MO = NewMI->getOperand(2);
- MO.setReg(GR64Cand ? GR64Cand : X86::RAX);
- MO.setIsKill(true);
+ MachineOperand *MO = &NewMI->getOperand(2);
+ MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
// tileloadd (%sp, %idx), %tmm
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
#undef GET_EGPR_IF_ENABLED
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
TileSS);
+ MO = &NewMI->getOperand(3);
+ MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
+ MO->setIsKill(true);
if (!GR64Cand) {
// restore %rax
// mov (%sp) %rax
diff --git a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
index 15e7136f4a5030..fbebb955f8d976 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll
@@ -148,7 +148,7 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movabsq $64, %rax
; CHECK-NEXT: tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill
-; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
+; CHECK-NEXT: tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: jmp .LBB1_4
%4 = shl i32 %2, 4
@@ -212,7 +212,7 @@ define void @multi_use() nounwind {
; CHECK-NEXT: tilezero %tmm0
; CHECK-NEXT: movabsq $64, %rbp
; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill
-; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
+; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload
; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm1
; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm0
; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70
|
// tileloadd (%sp, %idx), %tmm | ||
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); | ||
#undef GET_EGPR_IF_ENABLED | ||
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg), | ||
TileSS); | ||
MO = &NewMI->getOperand(3); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it better to use "1 + X86::AddrIndexReg" instead of "3" as in line X86InstrInfo.cpp#L4768?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, done.
Ping? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SGTM but I know almost nothing about AMX
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fixes: #110190