Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 0bca0d9

Browse files
committed
[X86] Use a shift plus an lea for multiplying by a constant that is a power of 2 plus 2/4/8.
The LEA allows us to combine an add and the multiply by 2/4/8 together so we just need a shift for the larger power of 2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337875 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent d277be2 commit 0bca0d9

File tree

4 files changed

+172
-22
lines changed

4 files changed

+172
-22
lines changed

lib/Target/X86/X86ISelLowering.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -33749,6 +33749,24 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
3374933749
return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
3375033750
combineMulMulAddOrSub(9, 3, /*isAdd*/ true));
3375133751
}
33752+
33753+
// Another trick. If this is a power 2 + 2/4/8, we can use a shift followed
33754+
// by a single LEA.
33755+
// First check if this a sum of two power of 2s because that's easy. Then
33756+
// count how many zeros are up to the first bit.
33757+
// TODO: We can do this even without LEA at a cost of two shifts and an add.
33758+
if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
33759+
unsigned ScaleShift = countTrailingZeros(MulAmt);
33760+
if (ScaleShift >= 1 && ScaleShift < 4) {
33761+
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
33762+
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
33763+
DAG.getConstant(ShiftAmt, DL, MVT::i8));
33764+
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
33765+
DAG.getConstant(ScaleShift, DL, MVT::i8));
33766+
return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
33767+
}
33768+
}
33769+
3375233770
return SDValue();
3375333771
}
3375433772

test/CodeGen/X86/mul-constant-i16.ll

+27-7
Original file line numberDiff line numberDiff line change
@@ -705,11 +705,10 @@ define i16 @test_mul_by_62(i16 %x) {
705705
define i16 @test_mul_by_66(i16 %x) {
706706
; X86-LABEL: test_mul_by_66:
707707
; X86: # %bb.0:
708-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
709-
; X86-NEXT: movl %ecx, %eax
710-
; X86-NEXT: shll $6, %eax
711-
; X86-NEXT: addl %ecx, %eax
712-
; X86-NEXT: addl %ecx, %eax
708+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
709+
; X86-NEXT: movl %eax, %ecx
710+
; X86-NEXT: shll $6, %ecx
711+
; X86-NEXT: leal (%ecx,%eax,2), %eax
713712
; X86-NEXT: # kill: def $ax killed $ax killed $eax
714713
; X86-NEXT: retl
715714
;
@@ -718,8 +717,7 @@ define i16 @test_mul_by_66(i16 %x) {
718717
; X64-NEXT: # kill: def $edi killed $edi def $rdi
719718
; X64-NEXT: movl %edi, %eax
720719
; X64-NEXT: shll $6, %eax
721-
; X64-NEXT: leal (%rax,%rdi), %eax
722-
; X64-NEXT: addl %edi, %eax
720+
; X64-NEXT: leal (%rax,%rdi,2), %eax
723721
; X64-NEXT: # kill: def $ax killed $ax killed $eax
724722
; X64-NEXT: retq
725723
%mul = mul nsw i16 %x, 66
@@ -746,6 +744,28 @@ define i16 @test_mul_by_73(i16 %x) {
746744
ret i16 %mul
747745
}
748746

747+
define i16 @test_mul_by_520(i16 %x) {
748+
; X86-LABEL: test_mul_by_520:
749+
; X86: # %bb.0:
750+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
751+
; X86-NEXT: movl %eax, %ecx
752+
; X86-NEXT: shll $9, %ecx
753+
; X86-NEXT: leal (%ecx,%eax,8), %eax
754+
; X86-NEXT: # kill: def $ax killed $ax killed $eax
755+
; X86-NEXT: retl
756+
;
757+
; X64-LABEL: test_mul_by_520:
758+
; X64: # %bb.0:
759+
; X64-NEXT: # kill: def $edi killed $edi def $rdi
760+
; X64-NEXT: movl %edi, %eax
761+
; X64-NEXT: shll $9, %eax
762+
; X64-NEXT: leal (%rax,%rdi,8), %eax
763+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
764+
; X64-NEXT: retq
765+
%mul = mul nsw i16 %x, 520
766+
ret i16 %mul
767+
}
768+
749769
; (x*9+42)*(x*5+2)
750770
define i16 @test_mul_spec(i16 %x) nounwind {
751771
; X86-LABEL: test_mul_spec:

test/CodeGen/X86/mul-constant-i32.ll

+63-9
Original file line numberDiff line numberDiff line change
@@ -1836,29 +1836,26 @@ define i32 @test_mul_by_62(i32 %x) {
18361836
define i32 @test_mul_by_66(i32 %x) {
18371837
; X86-LABEL: test_mul_by_66:
18381838
; X86: # %bb.0:
1839-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1840-
; X86-NEXT: movl %ecx, %eax
1841-
; X86-NEXT: shll $6, %eax
1842-
; X86-NEXT: addl %ecx, %eax
1843-
; X86-NEXT: addl %ecx, %eax
1839+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1840+
; X86-NEXT: movl %eax, %ecx
1841+
; X86-NEXT: shll $6, %ecx
1842+
; X86-NEXT: leal (%ecx,%eax,2), %eax
18441843
; X86-NEXT: retl
18451844
;
18461845
; X64-HSW-LABEL: test_mul_by_66:
18471846
; X64-HSW: # %bb.0:
18481847
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
18491848
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
18501849
; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50]
1851-
; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
1852-
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
1850+
; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
18531851
; X64-HSW-NEXT: retq # sched: [7:1.00]
18541852
;
18551853
; X64-JAG-LABEL: test_mul_by_66:
18561854
; X64-JAG: # %bb.0:
18571855
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
18581856
; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
18591857
; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50]
1860-
; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
1861-
; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
1858+
; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [2:1.00]
18621859
; X64-JAG-NEXT: retq # sched: [4:1.00]
18631860
;
18641861
; X86-NOOPT-LABEL: test_mul_by_66:
@@ -1943,6 +1940,63 @@ define i32 @test_mul_by_73(i32 %x) {
19431940
ret i32 %mul
19441941
}
19451942

1943+
define i32 @test_mul_by_520(i32 %x) {
1944+
; X86-LABEL: test_mul_by_520:
1945+
; X86: # %bb.0:
1946+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1947+
; X86-NEXT: movl %eax, %ecx
1948+
; X86-NEXT: shll $9, %ecx
1949+
; X86-NEXT: leal (%ecx,%eax,8), %eax
1950+
; X86-NEXT: retl
1951+
;
1952+
; X64-HSW-LABEL: test_mul_by_520:
1953+
; X64-HSW: # %bb.0:
1954+
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
1955+
; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
1956+
; X64-HSW-NEXT: shll $9, %eax # sched: [1:0.50]
1957+
; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax # sched: [1:0.50]
1958+
; X64-HSW-NEXT: retq # sched: [7:1.00]
1959+
;
1960+
; X64-JAG-LABEL: test_mul_by_520:
1961+
; X64-JAG: # %bb.0:
1962+
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
1963+
; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
1964+
; X64-JAG-NEXT: shll $9, %eax # sched: [1:0.50]
1965+
; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax # sched: [2:1.00]
1966+
; X64-JAG-NEXT: retq # sched: [4:1.00]
1967+
;
1968+
; X86-NOOPT-LABEL: test_mul_by_520:
1969+
; X86-NOOPT: # %bb.0:
1970+
; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %eax # imm = 0x208
1971+
; X86-NOOPT-NEXT: retl
1972+
;
1973+
; HSW-NOOPT-LABEL: test_mul_by_520:
1974+
; HSW-NOOPT: # %bb.0:
1975+
; HSW-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
1976+
; HSW-NOOPT-NEXT: # sched: [3:1.00]
1977+
; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
1978+
;
1979+
; JAG-NOOPT-LABEL: test_mul_by_520:
1980+
; JAG-NOOPT: # %bb.0:
1981+
; JAG-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
1982+
; JAG-NOOPT-NEXT: # sched: [3:1.00]
1983+
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
1984+
;
1985+
; X64-SLM-LABEL: test_mul_by_520:
1986+
; X64-SLM: # %bb.0:
1987+
; X64-SLM-NEXT: imull $520, %edi, %eax # imm = 0x208
1988+
; X64-SLM-NEXT: # sched: [3:1.00]
1989+
; X64-SLM-NEXT: retq # sched: [4:1.00]
1990+
;
1991+
; SLM-NOOPT-LABEL: test_mul_by_520:
1992+
; SLM-NOOPT: # %bb.0:
1993+
; SLM-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
1994+
; SLM-NOOPT-NEXT: # sched: [3:1.00]
1995+
; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
1996+
%mul = mul nsw i32 %x, 520
1997+
ret i32 %mul
1998+
}
1999+
19462000
; (x*9+42)*(x*5+2)
19472001
define i32 @test_mul_spec(i32 %x) nounwind {
19482002
; X86-LABEL: test_mul_spec:

test/CodeGen/X86/mul-constant-i64.ll

+64-6
Original file line numberDiff line numberDiff line change
@@ -1938,8 +1938,7 @@ define i64 @test_mul_by_66(i64 %x) {
19381938
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
19391939
; X86-NEXT: movl %eax, %ecx
19401940
; X86-NEXT: shll $6, %ecx
1941-
; X86-NEXT: addl %eax, %ecx
1942-
; X86-NEXT: addl %eax, %ecx
1941+
; X86-NEXT: leal (%ecx,%eax,2), %ecx
19431942
; X86-NEXT: movl $66, %eax
19441943
; X86-NEXT: mull {{[0-9]+}}(%esp)
19451944
; X86-NEXT: addl %ecx, %edx
@@ -1949,16 +1948,14 @@ define i64 @test_mul_by_66(i64 %x) {
19491948
; X64-HSW: # %bb.0:
19501949
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
19511950
; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50]
1952-
; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
1953-
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
1951+
; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
19541952
; X64-HSW-NEXT: retq # sched: [7:1.00]
19551953
;
19561954
; X64-JAG-LABEL: test_mul_by_66:
19571955
; X64-JAG: # %bb.0:
19581956
; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
19591957
; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50]
1960-
; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
1961-
; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
1958+
; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [2:1.00]
19621959
; X64-JAG-NEXT: retq # sched: [4:1.00]
19631960
;
19641961
; X86-NOOPT-LABEL: test_mul_by_66:
@@ -2049,6 +2046,67 @@ define i64 @test_mul_by_73(i64 %x) {
20492046
ret i64 %mul
20502047
}
20512048

2049+
define i64 @test_mul_by_520(i64 %x) {
2050+
; X86-LABEL: test_mul_by_520:
2051+
; X86: # %bb.0:
2052+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2053+
; X86-NEXT: movl %eax, %ecx
2054+
; X86-NEXT: shll $9, %ecx
2055+
; X86-NEXT: leal (%ecx,%eax,8), %ecx
2056+
; X86-NEXT: movl $520, %eax # imm = 0x208
2057+
; X86-NEXT: mull {{[0-9]+}}(%esp)
2058+
; X86-NEXT: addl %ecx, %edx
2059+
; X86-NEXT: retl
2060+
;
2061+
; X64-HSW-LABEL: test_mul_by_520:
2062+
; X64-HSW: # %bb.0:
2063+
; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
2064+
; X64-HSW-NEXT: shlq $9, %rax # sched: [1:0.50]
2065+
; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax # sched: [1:0.50]
2066+
; X64-HSW-NEXT: retq # sched: [7:1.00]
2067+
;
2068+
; X64-JAG-LABEL: test_mul_by_520:
2069+
; X64-JAG: # %bb.0:
2070+
; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
2071+
; X64-JAG-NEXT: shlq $9, %rax # sched: [1:0.50]
2072+
; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax # sched: [2:1.00]
2073+
; X64-JAG-NEXT: retq # sched: [4:1.00]
2074+
;
2075+
; X86-NOOPT-LABEL: test_mul_by_520:
2076+
; X86-NOOPT: # %bb.0:
2077+
; X86-NOOPT-NEXT: movl $520, %eax # imm = 0x208
2078+
; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
2079+
; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %ecx # imm = 0x208
2080+
; X86-NOOPT-NEXT: addl %ecx, %edx
2081+
; X86-NOOPT-NEXT: retl
2082+
;
2083+
; HSW-NOOPT-LABEL: test_mul_by_520:
2084+
; HSW-NOOPT: # %bb.0:
2085+
; HSW-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
2086+
; HSW-NOOPT-NEXT: # sched: [3:1.00]
2087+
; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
2088+
;
2089+
; JAG-NOOPT-LABEL: test_mul_by_520:
2090+
; JAG-NOOPT: # %bb.0:
2091+
; JAG-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
2092+
; JAG-NOOPT-NEXT: # sched: [6:4.00]
2093+
; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
2094+
;
2095+
; X64-SLM-LABEL: test_mul_by_520:
2096+
; X64-SLM: # %bb.0:
2097+
; X64-SLM-NEXT: imulq $520, %rdi, %rax # imm = 0x208
2098+
; X64-SLM-NEXT: # sched: [3:1.00]
2099+
; X64-SLM-NEXT: retq # sched: [4:1.00]
2100+
;
2101+
; SLM-NOOPT-LABEL: test_mul_by_520:
2102+
; SLM-NOOPT: # %bb.0:
2103+
; SLM-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
2104+
; SLM-NOOPT-NEXT: # sched: [3:1.00]
2105+
; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
2106+
%mul = mul nsw i64 %x, 520
2107+
ret i64 %mul
2108+
}
2109+
20522110
; (x*9+42)*(x*5+2)
20532111
define i64 @test_mul_spec(i64 %x) nounwind {
20542112
; X86-LABEL: test_mul_spec:

0 commit comments

Comments
 (0)