-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86] Put R20/R21/R28/R29 later in GR64 list #120510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Because these registers require extra bytes to encode in certain memory form. Putting them later in the list will reduce code size when EGPR is enabled. And align the same order in GR8, GR16 and GR32 lists. Example: movq (%r20), %r11 # encoding: [0xd5,0x1c,0x8b,0x1c,0x24] movq (%r22), %r11 # encoding: [0xd5,0x1c,0x8b,0x1e]
@llvm/pr-subscribers-backend-x86 Author: Feng Zou (fzou1) ChangesBecause these registers require extra bytes to encode in certain memory form. Putting them later in the list will reduce code size when EGPR is enabled. And align the same order in GR8, GR16 and GR32 lists. Example:
Patch is 91.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120510.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index f93f920b6aeca3..d218ad0aefc8c5 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -550,9 +550,9 @@ def SSP : X86Reg<"ssp", 0>;
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B,
- R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B,
- R30B, R31B, R14B, R15B, R12B, R13B)> {
+ R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R22B,
+ R23B, R24B, R25B, R26B, R27B, R30B, R31B, R14B,
+ R15B, R12B, R13B, R20B, R21B, R28B, R29B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
@@ -567,9 +567,9 @@ def GRH8 : RegisterClass<"X86", [i8], 8,
R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W,
- R11W, R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W,
- R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, R14W,
- R15W, R12W, R13W)>;
+ R11W, R16W, R17W, R18W, R19W, R22W, R23W, R24W,
+ R25W, R26W, R27W, R30W, R31W, R14W, R15W, R12W,
+ R13W, R20W, R21W, R28W, R29W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
@@ -579,9 +579,9 @@ def GRH16 : RegisterClass<"X86", [i16], 16,
R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D,
- R10D, R11D, R16D, R17D, R18D, R19D, R20D, R21D, R22D,
- R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D,
- R14D, R15D, R12D, R13D)>;
+ R10D, R11D, R16D, R17D, R18D, R19D, R22D, R23D,
+ R24D, R25D, R26D, R27D, R30D, R31D, R14D, R15D,
+ R12D, R13D, R20D, R21D, R28D, R29D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
@@ -590,8 +590,8 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17,
- R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+ R18, R19, R22, R23, R24, R25, R26, R27, R30, R31, RBX,
+ R14, R15, R12, R13, R20, R21, R28, R29, RBP, RSP, RIP)>;
// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
// emitting code for intrinsics, which use implict input registers.
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index a4d15a1b21d6b4..a29a92176f4323 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -13,104 +13,104 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: pushq %rbx
; EGPR-NEXT: subq $104, %rsp
; EGPR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: movq %rdi, %r24
+; EGPR-NEXT: movq %rdi, %r26
; EGPR-NEXT: movq (%rdi), %r13
; EGPR-NEXT: movq 8(%rdi), %r18
-; EGPR-NEXT: movq 24(%rdi), %r29
+; EGPR-NEXT: movq 24(%rdi), %r21
; EGPR-NEXT: movq 16(%rdi), %r17
; EGPR-NEXT: movq 40(%rdi), %rdi
-; EGPR-NEXT: movq 32(%r24), %r10
-; EGPR-NEXT: movq 56(%r24), %r15
-; EGPR-NEXT: movq 48(%r24), %r12
+; EGPR-NEXT: movq 32(%r26), %r10
+; EGPR-NEXT: movq 56(%r26), %r15
+; EGPR-NEXT: movq 48(%r26), %r12
; EGPR-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: movq 24(%rsi), %r23
+; EGPR-NEXT: movq 24(%rsi), %r25
; EGPR-NEXT: movq 16(%rsi), %r11
-; EGPR-NEXT: movq (%rsi), %r27
+; EGPR-NEXT: movq (%rsi), %r31
; EGPR-NEXT: movq 8(%rsi), %r14
; EGPR-NEXT: movq %r12, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r19
; EGPR-NEXT: movq %r15, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r12, %rax
; EGPR-NEXT: mulq %r14
-; EGPR-NEXT: movq %rdx, %r20
+; EGPR-NEXT: movq %rdx, %r22
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r20
+; EGPR-NEXT: adcq %r9, %r22
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r14
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
-; EGPR-NEXT: addq %r20, %r16
+; EGPR-NEXT: addq %r22, %r16
; EGPR-NEXT: adcq %rcx, %r9
; EGPR-NEXT: movq %r10, %rax
-; EGPR-NEXT: mulq %r27
-; EGPR-NEXT: movq %rdx, %r20
-; EGPR-NEXT: movq %rax, %r25
+; EGPR-NEXT: mulq %r31
+; EGPR-NEXT: movq %rdx, %r22
+; EGPR-NEXT: movq %rax, %r27
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r27
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r20, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: mulq %r31
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r22, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %r10, %rax
; EGPR-NEXT: mulq %r14
-; EGPR-NEXT: movq %rdx, %r20
-; EGPR-NEXT: movq %rax, %r28
-; EGPR-NEXT: addq %r22, %r28
-; EGPR-NEXT: adcq %r21, %r20
+; EGPR-NEXT: movq %rdx, %r22
+; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: addq %r24, %r20
+; EGPR-NEXT: adcq %r23, %r22
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %rdi, %rax
; EGPR-NEXT: mulq %r14
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r20, %r22
-; EGPR-NEXT: adcq %rcx, %r21
-; EGPR-NEXT: addq %r19, %r22
-; EGPR-NEXT: adcq %r8, %r21
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r22, %r24
+; EGPR-NEXT: adcq %rcx, %r23
+; EGPR-NEXT: addq %r19, %r24
+; EGPR-NEXT: adcq %r8, %r23
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r10, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: movq %rax, %r28
; EGPR-NEXT: movq %rdi, %rax
; EGPR-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r19
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: addq %r8, %r20
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: addq %r8, %r22
; EGPR-NEXT: adcq $0, %r19
; EGPR-NEXT: movq %r10, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %rbx
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r20, %r31
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r22, %r29
; EGPR-NEXT: adcq %r19, %rbx
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r23
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %rbx, %r8
-; EGPR-NEXT: adcq %rcx, %r26
-; EGPR-NEXT: addq %r22, %r30
-; EGPR-NEXT: adcq %r21, %r31
+; EGPR-NEXT: adcq %rcx, %r30
+; EGPR-NEXT: addq %r24, %r28
+; EGPR-NEXT: adcq %r23, %r29
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r26
+; EGPR-NEXT: adcq %r9, %r30
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -122,34 +122,34 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r16
-; EGPR-NEXT: movq %rax, %r21
-; EGPR-NEXT: addq %r9, %r21
+; EGPR-NEXT: movq %rax, %r23
+; EGPR-NEXT: addq %r9, %r23
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: movq %r12, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %rdi
-; EGPR-NEXT: addq %r21, %rdi
+; EGPR-NEXT: addq %r23, %rdi
; EGPR-NEXT: adcq %r16, %r9
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %r10d
; EGPR-NEXT: movq %r15, %rax
-; EGPR-NEXT: mulq %r23
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r9, %r22
-; EGPR-NEXT: adcq %r10, %r21
+; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r9, %r24
+; EGPR-NEXT: adcq %r10, %r23
; EGPR-NEXT: addq %r8, %rsi
; EGPR-NEXT: movq %rsi, %r19
-; EGPR-NEXT: adcq %r26, %rdi
-; EGPR-NEXT: adcq %rcx, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: adcq %r30, %rdi
+; EGPR-NEXT: adcq %rcx, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rbx
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
@@ -157,12 +157,12 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %r17, %rax
; EGPR-NEXT: mulq %r14
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r14
; EGPR-NEXT: movq %r14, %rsi
; EGPR-NEXT: movq %rdx, %r9
@@ -170,11 +170,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq %rcx, %r9
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r18, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r14
; EGPR-NEXT: movq %rax, %r15
; EGPR-NEXT: addq %r8, %r15
@@ -195,40 +195,40 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %r8
; EGPR-NEXT: addq %rbx, %r15
-; EGPR-NEXT: adcq %r26, %r8
+; EGPR-NEXT: adcq %r30, %r8
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r13, %rax
; EGPR-NEXT: mulq %r11
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %rsi
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %rbx
; EGPR-NEXT: movq %rax, %r14
-; EGPR-NEXT: addq %r26, %r14
+; EGPR-NEXT: addq %r30, %r14
; EGPR-NEXT: adcq $0, %rbx
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r12
; EGPR-NEXT: addq %r14, %rax
; EGPR-NEXT: movq %rax, %r10
; EGPR-NEXT: adcq %rbx, %r12
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r18, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r14
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r12, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r12, %r30
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %r14
; EGPR-NEXT: addq %r15, %rsi
; EGPR-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq %r8, %r10
; EGPR-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: adcq $0, %r14
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r14
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r17, %rax
@@ -236,48 +236,48 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rbx
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r15
; EGPR-NEXT: addq %r16, %r15
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %r9b
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r12
; EGPR-NEXT: movq %rax, %rbp
; EGPR-NEXT: addq %r8, %rbp
; EGPR-NEXT: movzbl %r9b, %eax
; EGPR-NEXT: adcq %rax, %r12
-; EGPR-NEXT: addq %r26, %rbx
+; EGPR-NEXT: addq %r30, %rbx
; EGPR-NEXT: adcq %r14, %r15
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %rbp
; EGPR-NEXT: adcq $0, %r12
-; EGPR-NEXT: addq %r25, %rbx
+; EGPR-NEXT: addq %r27, %rbx
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; EGPR-NEXT: movq 32(%rsi), %r25
-; EGPR-NEXT: adcq %r28, %r15
-; EGPR-NEXT: adcq %r30, %rbp
-; EGPR-NEXT: adcq %r31, %r12
+; EGPR-NEXT: movq 32(%rsi), %r27
+; EGPR-NEXT: adcq %r20, %r15
+; EGPR-NEXT: adcq %r28, %rbp
+; EGPR-NEXT: adcq %r29, %r12
; EGPR-NEXT: adcq $0, %r19
; EGPR-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq $0, %rdi
-; EGPR-NEXT: adcq $0, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: adcq $0, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r28
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
@@ -286,11 +286,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %r17, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %r10b
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
@@ -298,138 +298,138 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movzbl %r10b, %eax
; EGPR-NEXT: adcq %rax, %r9
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r19
; EGPR-NEXT: movq %r18, %rax
-; EGPR-NEXT: mulq %r25
-; EGPR-NEXT: movq %rdx, %r30
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r8, %r31
-; EGPR-NEXT: adcq $0, %r30
+; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %rdx, %r28
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r8, %r29
+; EGPR-NEXT: adcq $0, %r28
; EGPR-NEXT: movq %r13, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: addq %r31, %r20
-; EGPR-NEXT: adcq %r30, %r8
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: addq %r29, %r22
+; EGPR-NEXT: adcq %r28, %r8
; EGPR-NEXT: setb %r10b
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: mulq %rcx
-; EGPR-NEXT: movq %rdx, %r30
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r8, %r31
+; EGPR-NEXT: movq %rdx, %r28
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r8, %r29
; EGPR-NEXT: movzbl %r10b, %eax
-; EGPR-NEXT: adcq %rax, %r30
-; EGPR-NEXT: addq %r28, %r31
-; EGPR-NEXT: adcq %r26, %r30
+; EGPR-NEXT: adcq %rax, %r28
+; EGPR-NEXT: addq %r20, %r29
+; EGPR-NEXT: adcq %r30, %r28
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
-; EGPR-NEXT: movq 48(%rsi), %r28
+; EGPR-NEXT: movq 48(%rsi), %r20
; EGPR-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r11
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: mulq %r28
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: mulq %r20
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r14
; EGPR-NEXT: addq %r8, %r14
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: movq 56(%rsi), %r10
; EGPR-NEXT: movq %r13, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r13
; EGPR-NEXT: addq %r14, %rax
; EGPR-NEXT: movq %rax, %r14
-; EGPR-NEXT: adcq %r26, %r13
+; EGPR-NEXT: adcq %r30, %r13
; EGPR-NEXT: setb %sil
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: mulq %r10
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r13, %r8
; EGPR-NEXT: movzbl %sil, %eax
-; EGPR-NEXT: adcq %rax, %r26
-; EGPR-NEXT: addq %r31, %r11
-; EGPR-NEXT: adcq %r30, %r14
+; EGPR-NEXT: adcq %rax, %r30
+; EGPR-NEXT: addq %r29, %r11
+; EGPR-NEXT: adcq %r28, %r14
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r26
+; EGPR-NEXT: adcq %r9, %r30
; EGPR-NEXT: setb %r18b
; EGPR-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r9
-; EGPR-NEXT: movq %rax, %r30
-; EGPR-NEXT: movq %r29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: movq %rax, %r28
+; EGPR-NEXT: movq %r21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r16
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r9, %r31
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r9, %r29
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: movq %r17, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r17
-; EGPR-NEXT: addq %r31, %r17
+; EGPR-NEXT: addq %r29, %r17
; EGPR-NEXT: adcq %r16, %r9
; EGPR-NEXT: setb %r16b
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r13
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r9, %r31
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r9, %r29
; EGPR-NEXT: movzbl %r16b, %eax
; EGPR-NEXT: adcq %rax, %r13
-; EGPR-NEXT: addq %r8, %r30
-; EGPR-NEXT: adcq %r26, %r17
+; EGPR-NEXT: addq %r8, %r28
+; EGPR-NEXT: adcq %r30, %r17
; EGPR-NEXT: movzbl %r18b, %eax
-; EGPR-NEXT: adcq %rax, %r31
+; EGPR-NEXT: adcq %rax, %r29
; EGPR-NEXT: adcq $0, %r13
; EGPR-NEXT: addq %rbx, %r19
; EGPR-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq %...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B, | ||
R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B, | ||
R30B, R31B, R14B, R15B, R12B, R13B)> { | ||
R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R22B, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Update the comment above which refers to R12/R13 being allocated last
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated. Thank you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Because these registers require an extra byte to encode in certain memory form. Putting them later in the list will reduce code size when EGPR is enabled. And align the same order in GR8, GR16 and GR32 lists. Example: