Skip to content

Commit d9fc4d6

Browse files
phoebewangDanielCChen
authored andcommitted
[X86][AMX] Fix missing stride register for tileloadd (llvm#110226)
Fixes: llvm#110190
1 parent 9ff3a01 commit d9fc4d6

File tree

3 files changed

+10
-8
lines changed

3 files changed

+10
-8
lines changed

llvm/lib/Target/X86/X86LowerTileCopy.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,16 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
140140
MachineInstr *NewMI =
141141
addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
142142
.addReg(SrcReg, getKillRegState(SrcMO.isKill()));
143-
MachineOperand &MO = NewMI->getOperand(2);
144-
MO.setReg(GR64Cand ? GR64Cand : X86::RAX);
145-
MO.setIsKill(true);
143+
MachineOperand *MO = &NewMI->getOperand(X86::AddrIndexReg);
144+
MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
146145
// tileloadd (%sp, %idx), %tmm
147146
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
148147
#undef GET_EGPR_IF_ENABLED
149148
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
150149
TileSS);
150+
MO = &NewMI->getOperand(1 + X86::AddrIndexReg);
151+
MO->setReg(GR64Cand ? GR64Cand : X86::RAX);
152+
MO->setIsKill(true);
151153
if (!GR64Cand) {
152154
// restore %rax
153155
// mov (%sp) %rax

llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ define dso_local void @test1(ptr%buf) nounwind {
4545
; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm0
4646
; CHECK-NEXT: tileloadd (%rbx,%r15), %tmm1
4747
; CHECK-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
48-
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
48+
; CHECK-NEXT: tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload
4949
; CHECK-NEXT: tdpbssd %tmm1, %tmm0, %tmm2
5050
; CHECK-NEXT: tilestored %tmm2, (%rbx,%r15)
5151
; CHECK-NEXT: incl %r14d
@@ -109,8 +109,8 @@ define dso_local void @test1(ptr%buf) nounwind {
109109
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
110110
; EGPR-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
111111
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
112-
; EGPR-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
113-
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
112+
; EGPR-NEXT: tileloadd 1024(%rsp,%rax), %tmm2 # 1024-byte Folded Reload
113+
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x04,0x00,0x04,0x00,0x00]
114114
; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
115115
; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
116116
; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]

llvm/test/CodeGen/X86/AMX/amx-tile-basic.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) nounwind {
148148
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
149149
; CHECK-NEXT: movabsq $64, %rax
150150
; CHECK-NEXT: tilestored %tmm0, 3072(%rsp,%rax) # 1024-byte Folded Spill
151-
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
151+
; CHECK-NEXT: tileloadd 3072(%rsp,%rax), %tmm1 # 1024-byte Folded Reload
152152
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
153153
; CHECK-NEXT: jmp .LBB1_4
154154
%4 = shl i32 %2, 4
@@ -212,7 +212,7 @@ define void @multi_use() nounwind {
212212
; CHECK-NEXT: tilezero %tmm0
213213
; CHECK-NEXT: movabsq $64, %rbp
214214
; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill
215-
; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
215+
; CHECK-NEXT: tileloadd 896(%rsp,%rbp), %tmm1 # 1024-byte Folded Reload
216216
; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm1
217217
; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm0
218218
; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70

0 commit comments

Comments
 (0)