Skip to content

Commit fbb9d49

Browse files
authored
[X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE (#123267)
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/784266
1 parent d7e48fb commit fbb9d49

13 files changed

+758
-62
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
578578
unsigned Opc;
579579
switch (Opcode) {
580580
case X86::PTILELOADDRSV:
581-
Opc = X86::TILELOADDRS;
581+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
582582
break;
583583
case X86::PTILELOADDRST1V:
584-
Opc = X86::TILELOADDRST1;
584+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
585585
break;
586586
case X86::PTILELOADDV:
587587
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
737737
unsigned Opc;
738738
switch (Opcode) {
739739
case X86::PT2RPNTLVWZ0V:
740-
Opc = X86::T2RPNTLVWZ0;
740+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
741741
break;
742742
case X86::PT2RPNTLVWZ0T1V:
743-
Opc = X86::T2RPNTLVWZ0T1;
743+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
744744
break;
745745
case X86::PT2RPNTLVWZ1V:
746-
Opc = X86::T2RPNTLVWZ1;
746+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
747747
break;
748748
case X86::PT2RPNTLVWZ1T1V:
749-
Opc = X86::T2RPNTLVWZ1T1;
749+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
750750
break;
751751
case X86::PT2RPNTLVWZ0RSV:
752-
Opc = X86::T2RPNTLVWZ0RS;
752+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
753753
break;
754754
case X86::PT2RPNTLVWZ0RST1V:
755-
Opc = X86::T2RPNTLVWZ0RST1;
755+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
756756
break;
757757
case X86::PT2RPNTLVWZ1RSV:
758-
Opc = X86::T2RPNTLVWZ1RS;
758+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
759759
break;
760760
case X86::PT2RPNTLVWZ1RST1V:
761-
Opc = X86::T2RPNTLVWZ1RST1;
761+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
762762
break;
763763
default:
764764
llvm_unreachable("Impossible Opcode!");

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3780037800
case X86::PTILESTORED:
3780137801
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
3780237802
break;
37803-
#undef GET_EGPR_IF_ENABLED
3780437803
case X86::PTILELOADDRS:
37805-
Opc = X86::TILELOADDRS;
37804+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
3780637805
break;
3780737806
case X86::PTILELOADDRST1:
37808-
Opc = X86::TILELOADDRST1;
37807+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
3780937808
break;
3781037809
}
37810+
#undef GET_EGPR_IF_ENABLED
3781137811

3781237812
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
3781337813
unsigned CurOp = 0;
@@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3783837838
case X86::PT2RPNTLVWZ1RST1: {
3783937839
const DebugLoc &DL = MI.getDebugLoc();
3784037840
unsigned Opc;
37841+
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
3784137842
switch (MI.getOpcode()) {
3784237843
default:
3784337844
llvm_unreachable("Unexpected instruction!");
3784437845
case X86::PT2RPNTLVWZ0:
37845-
Opc = X86::T2RPNTLVWZ0;
37846+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
3784637847
break;
3784737848
case X86::PT2RPNTLVWZ0T1:
37848-
Opc = X86::T2RPNTLVWZ0T1;
37849+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
3784937850
break;
3785037851
case X86::PT2RPNTLVWZ1:
37851-
Opc = X86::T2RPNTLVWZ1;
37852+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
3785237853
break;
3785337854
case X86::PT2RPNTLVWZ1T1:
37854-
Opc = X86::T2RPNTLVWZ1T1;
37855+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
3785537856
break;
3785637857
case X86::PT2RPNTLVWZ0RS:
37857-
Opc = X86::T2RPNTLVWZ0RS;
37858+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
3785837859
break;
3785937860
case X86::PT2RPNTLVWZ0RST1:
37860-
Opc = X86::T2RPNTLVWZ0RST1;
37861+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
3786137862
break;
3786237863
case X86::PT2RPNTLVWZ1RS:
37863-
Opc = X86::T2RPNTLVWZ1RS;
37864+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
3786437865
break;
3786537866
case X86::PT2RPNTLVWZ1RST1:
37866-
Opc = X86::T2RPNTLVWZ1RST1;
37867+
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
3786737868
break;
3786837869
}
37870+
#undef GET_EGPR_IF_ENABLED
3786937871
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
3787037872
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
3787137873

llvm/lib/Target/X86/X86InstrAMX.td

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -345,26 +345,33 @@ let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSys
345345
def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
346346
}
347347

348-
let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
349-
let SchedRW = [WriteSystem] in {
350-
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
351-
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
352-
[]>, VEX, WIG, T8,PS;
348+
multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
349+
def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
350+
"t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
351+
def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
352+
"t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
353+
def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
354+
"t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
355+
def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
356+
"t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
357+
}
353358

354-
def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
355-
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
356-
[]>, VEX, T8,PS;
359+
let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
360+
defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;
357361

358-
def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
359-
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
360-
[]>, VEX, T8,PD;
362+
let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
363+
defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;
361364

362-
def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
363-
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
364-
[]>, VEX, T8,PD;
365+
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
366+
defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;
365367

368+
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
369+
defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;
370+
371+
let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
372+
let SchedRW = [WriteSystem] in {
366373
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
367-
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
374+
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
368375
let isPseudo = true in {
369376
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
370377
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -491,22 +498,6 @@ let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [Write
491498
}
492499

493500
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
494-
def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
495-
(ins sibmem:$src1),
496-
"t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
497-
[]>, VEX, T_MAP5;
498-
def T2RPNTLVWZ0RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
499-
(ins sibmem:$src1),
500-
"t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
501-
[]>, VEX, T_MAP5;
502-
def T2RPNTLVWZ1RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
503-
(ins sibmem:$src1),
504-
"t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
505-
[]>, VEX, T_MAP5, PD;
506-
def T2RPNTLVWZ1RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
507-
(ins sibmem:$src1),
508-
"t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
509-
[]>, VEX, T_MAP5, PD;
510501
let isPseudo = true in {
511502
def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst),
512503
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -529,16 +520,20 @@ let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSy
529520
}
530521
} // HasAMXMOVRS, HasAMXTRANSPOSE
531522

532-
let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
533-
def TILELOADDRS : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
534-
(ins sibmem:$src1),
535-
"tileloaddrs\t{$src1, $dst|$dst, $src1}",
536-
[]>, VEX, T8, XD;
537-
def TILELOADDRST1 : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
538-
(ins sibmem:$src1),
539-
"tileloaddrst1\t{$src1, $dst|$dst, $src1}",
540-
[]>, VEX, T8, PD;
523+
multiclass TILELOADDRS_Base<string suffix> {
524+
def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
525+
"tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
526+
def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
527+
"tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
528+
}
529+
530+
let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
531+
defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;
541532

533+
let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
534+
defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;
535+
536+
let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
542537
let isPseudo = true, mayLoad = 1 in {
543538
def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
544539
GR16:$src2,

llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
3+
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
34

45
define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
56
; CHECK-LABEL: test_amx_internal:
@@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
3536
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
3637
; CHECK-NEXT: tilerelease
3738
; CHECK-NEXT: retq
39+
;
40+
; EGPR-LABEL: test_amx_internal:
41+
; EGPR: # %bb.0: # %entry
42+
; EGPR-NEXT: pushq %rbp # encoding: [0x55]
43+
; EGPR-NEXT: .cfi_def_cfa_offset 16
44+
; EGPR-NEXT: .cfi_offset %rbp, -16
45+
; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
46+
; EGPR-NEXT: .cfi_def_cfa_register %rbp
47+
; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
48+
; EGPR-NEXT: # imm = 0xFC00
49+
; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
50+
; EGPR-NEXT: # imm = 0xC00
51+
; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
52+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
53+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
54+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
55+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
56+
; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
57+
; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
58+
; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
59+
; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
60+
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
61+
; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
62+
; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
63+
; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
64+
; EGPR-NEXT: # implicit-def: $al
65+
; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
66+
; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
67+
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
68+
; EGPR-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
69+
; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
70+
; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
71+
; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
72+
; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
73+
; EGPR-NEXT: popq %rbp # encoding: [0x5d]
74+
; EGPR-NEXT: .cfi_def_cfa %rsp, 8
75+
; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
76+
; EGPR-NEXT: retq # encoding: [0xc3]
3877
entry:
3978
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
4079
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
4887
; CHECK-NEXT: movl $32, %eax
4988
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
5089
; CHECK-NEXT: retq
90+
;
91+
; EGPR-LABEL: test_amx_old:
92+
; EGPR: # %bb.0: # %entry
93+
; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
94+
; EGPR-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
95+
; EGPR-NEXT: retq # encoding: [0xc3]
5196
entry:
5297
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
5398
ret void
@@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
88133
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
89134
; CHECK-NEXT: tilerelease
90135
; CHECK-NEXT: retq
136+
;
137+
; EGPR-LABEL: test_amx_t1_internal:
138+
; EGPR: # %bb.0: # %entry
139+
; EGPR-NEXT: pushq %rbp # encoding: [0x55]
140+
; EGPR-NEXT: .cfi_def_cfa_offset 16
141+
; EGPR-NEXT: .cfi_offset %rbp, -16
142+
; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
143+
; EGPR-NEXT: .cfi_def_cfa_register %rbp
144+
; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
145+
; EGPR-NEXT: # imm = 0xFC00
146+
; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
147+
; EGPR-NEXT: # imm = 0xC00
148+
; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
149+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
150+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
151+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
152+
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
153+
; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
154+
; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
155+
; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
156+
; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
157+
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
158+
; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
159+
; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
160+
; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
161+
; EGPR-NEXT: # implicit-def: $al
162+
; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
163+
; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
164+
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
165+
; EGPR-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
166+
; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
167+
; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
168+
; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
169+
; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
170+
; EGPR-NEXT: popq %rbp # encoding: [0x5d]
171+
; EGPR-NEXT: .cfi_def_cfa %rsp, 8
172+
; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
173+
; EGPR-NEXT: retq # encoding: [0xc3]
91174
entry:
92175
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
93176
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
101184
; CHECK-NEXT: movl $32, %eax
102185
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
103186
; CHECK-NEXT: retq
187+
;
188+
; EGPR-LABEL: test_amx_t1_old:
189+
; EGPR: # %bb.0: # %entry
190+
; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
191+
; EGPR-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
192+
; EGPR-NEXT: retq # encoding: [0xc3]
104193
entry:
105194
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
106195
ret void

0 commit comments

Comments
 (0)