-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[X86] ICMP EQ/NE MIN_SIGNED_INT - avoid immediate argument by using NEG + SETO/SETNO #94948
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesFor i64 this avoids loading a 64-bit value into register, for smaller registers this just avoids an immediate operand. I've let this fold run for i8+i16 as well as i32+i64, despite there being less of a codesize saving. Fixes #67709 Full diff: https://github.com/llvm/llvm-project/pull/94948.diff 7 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2aec14e93d082..cb51952285e71 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23816,6 +23816,18 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
}
}
+ // Look for X == INT_MIN or X != INT_MIN. We can use NEG and test for
+ // overflow.
+ if (isMinSignedConstant(Op1)) {
+ EVT VT = Op0.getValueType();
+ SDVTList CmpVTs = DAG.getVTList(VT, MVT::i32);
+ X86::CondCode CondCode = CC == ISD::SETEQ ? X86::COND_O : X86::COND_NO;
+ X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8);
+ SDValue Neg =
+ DAG.getNode(X86ISD::SUB, dl, CmpVTs, DAG.getConstant(0, dl, VT), Op0);
+ return SDValue(Neg.getNode(), 1);
+ }
+
// Try to use the carry flag from the add in place of an separate CMP for:
// (seteq (add X, -1), -1). Similar for setne.
if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD &&
diff --git a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll
index feaa38a7600a2..00ffea903079e 100644
--- a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll
+++ b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -8,8 +8,8 @@ define i16 @test(ptr %tmp179) nounwind {
; CHECK-NEXT: movzwl (%eax), %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl $64512, %ecx ## imm = 0xFC00
-; CHECK-NEXT: cmpl $32768, %ecx ## imm = 0x8000
-; CHECK-NEXT: jne LBB0_2
+; CHECK-NEXT: negw %cx
+; CHECK-NEXT: jno LBB0_2
; CHECK-NEXT: ## %bb.1: ## %bb189
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 49797fbefa597..5c5487815b336 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -58,8 +58,8 @@ define i32 @combine_sdiv_by_minsigned(i32 %x) {
; CHECK-LABEL: combine_sdiv_by_minsigned:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: negl %edi
+; CHECK-NEXT: seto %al
; CHECK-NEXT: retq
%1 = sdiv i32 %x, -2147483648
ret i32 %1
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 2046d790cc57e..780341c4b6821 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -936,15 +936,16 @@ entry:
define i1 @is_minus_zero_f(float %x) {
; CHECK-32-LABEL: is_minus_zero_f:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000
-; CHECK-32-NEXT: sete %al
+; CHECK-32-NEXT: xorl %eax, %eax
+; CHECK-32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: seto %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: is_minus_zero_f:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000
-; CHECK-64-NEXT: sete %al
+; CHECK-64-NEXT: negl %eax
+; CHECK-64-NEXT: seto %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 32) ; 0x20 = "-zero"
@@ -954,15 +955,16 @@ entry:
define i1 @not_is_minus_zero_f(float %x) {
; CHECK-32-LABEL: not_is_minus_zero_f:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000
-; CHECK-32-NEXT: setne %al
+; CHECK-32-NEXT: xorl %eax, %eax
+; CHECK-32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; CHECK-32-NEXT: setno %al
; CHECK-32-NEXT: retl
;
; CHECK-64-LABEL: not_is_minus_zero_f:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: movd %xmm0, %eax
-; CHECK-64-NEXT: cmpl $-2147483648, %eax # imm = 0x80000000
-; CHECK-64-NEXT: setne %al
+; CHECK-64-NEXT: negl %eax
+; CHECK-64-NEXT: setno %al
; CHECK-64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 991) ; ~0x20 = ~"-zero"
diff --git a/llvm/test/CodeGen/X86/lsr-overflow.ll b/llvm/test/CodeGen/X86/lsr-overflow.ll
index 09c1c07ef3de0..79440c282be75 100644
--- a/llvm/test/CodeGen/X86/lsr-overflow.ll
+++ b/llvm/test/CodeGen/X86/lsr-overflow.ll
@@ -4,9 +4,9 @@
; The comparison uses the pre-inc value, which could lead LSR to
; try to compute -INT64_MIN.
-; CHECK: movabsq $-9223372036854775808, %rax
-; CHECK: cmpq %rax,
-; CHECK: sete %al
+; CHECK-NOT: movabsq $-9223372036854775808, %rax
+; CHECK: negq %r
+; CHECK-NEXT: seto %al
declare i64 @bar()
diff --git a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll
index 254b8fe3fc6e3..5a15ee36c0726 100644
--- a/llvm/test/CodeGen/X86/shrink-compare-pgso.ll
+++ b/llvm/test/CodeGen/X86/shrink-compare-pgso.ll
@@ -265,8 +265,8 @@ if.end:
define dso_local void @test_sext_i8_icmp_neg128(i8 %x) nounwind !prof !14 {
; CHECK-LABEL: test_sext_i8_icmp_neg128:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: cmpb $-128, %dil
-; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: negb %dil
+; CHECK-NEXT: jo bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/shrink-compare.ll b/llvm/test/CodeGen/X86/shrink-compare.ll
index 840167ff9f4a0..1a61451c26a03 100644
--- a/llvm/test/CodeGen/X86/shrink-compare.ll
+++ b/llvm/test/CodeGen/X86/shrink-compare.ll
@@ -265,8 +265,8 @@ if.end:
define dso_local void @test_sext_i8_icmp_neg128(i8 %x) nounwind minsize {
; CHECK-LABEL: test_sext_i8_icmp_neg128:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: cmpb $-128, %dil
-; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: negb %dil
+; CHECK-NEXT: jo bar # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: retq
entry:
|
What about cmp x, 1(/dec x) + seto/setno? neg always clobbers the operand, but it might be beneficial to keep it? |
Yes we could use "cmp %x, 1" for cases where x has additional uses. What benefits does dec have other neg? I've always used neg but that's mainly as its nearly always been for abs |
None. My thinking was just that cmp x,1 for the last use of x could be combined later into dec when code size is important (-Os?) whereas for neg the distinction needs to be made here (sub 0,x vs sub x,1). |
Additional multi-use tests for #94948
llvm/test/CodeGen/X86/is_fpclass.ll
Outdated
; CHECK-32-NEXT: cmpl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 | ||
; CHECK-32-NEXT: setne %al | ||
; CHECK-32-NEXT: xorl %eax, %eax | ||
; CHECK-32-NEXT: cmpl {{[0-9]+}}(%esp), %eax |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can be a negl {{[0-9]+}}(%esp), %eax
when NDD is available.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll add test coverage
|
@@ -783,29 +783,28 @@ define i16 @test_minsigned_i16(i16 %a0, i16 %a1) nounwind { | |||
define i32 @test_minsigned_i32(i32 %a0, i32 %a1) nounwind { | |||
; X64-LABEL: test_minsigned_i32: | |||
; X64: # %bb.0: | |||
; X64-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 | |||
; X64-NEXT: jne .LBB19_1 | |||
; X64-NEXT: movl %edi, %eax |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this will increase register pressure. Should we check one use of it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've been testing with hasOneUse but it isn't great as its purely down to final execution order - the abs tests can reuse the neg flag results in most cases. I'm going to see if X86FlagsCopyLowering can help us
We always save at least 1 byte in the NEG vs CMP-imm encoding, so as long as we don't introduce any extra register moves we're still better off. Would people prefer if I just limited this to i32/i64 for now? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Additional multi-use tests for llvm#94948
For i64 this avoids loading a 64-bit value into register, for smaller registers this just avoids an immediate operand.
I've let this fold run for i8+i16 as well as i32+i64, despite there being less of a codesize saving.
Fixes #67709