Skip to content

Commit 33eb4d9

Browse files
author
Marek Olsak
committed
AMDGPU: Add a fast path for icmp.i1(src, false, NE)
Summary: This allows moving the condition from the intrinsic to the standard ICmp opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic is an identity for retrieving the SGPR mask. And we can also get the mask from and i1, or i1, xor i1. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D52060 llvm-svn: 351150
1 parent f793fe1 commit 33eb4d9

File tree

5 files changed

+226
-0
lines changed

5 files changed

+226
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5355,6 +5355,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
53555355
Denominator, Numerator);
53565356
}
53575357
case Intrinsic::amdgcn_icmp: {
5358+
// There is a Pat that handles this variant, so return it as-is.
5359+
if (Op.getOperand(1).getValueType() == MVT::i1 &&
5360+
Op.getConstantOperandVal(2) == 0 &&
5361+
Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
5362+
return Op;
53585363
return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
53595364
}
53605365
case Intrinsic::amdgcn_fcmp: {

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,11 @@ def : Pat <
583583

584584
// TODO: we could add more variants for other types of conditionals
585585

586+
def : Pat <
587+
(int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
588+
(COPY $src) // Return the SGPRs representing i1 src
589+
>;
590+
586591
//===----------------------------------------------------------------------===//
587592
// VOP1 Patterns
588593
//===----------------------------------------------------------------------===//

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3760,6 +3760,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
37603760
// Promote to next legal integer type.
37613761
unsigned Width = CmpType->getBitWidth();
37623762
unsigned NewWidth = Width;
3763+
3764+
// Don't do anything for i1 comparisons.
3765+
if (Width == 1)
3766+
break;
3767+
37633768
if (Width <= 16)
37643769
NewWidth = 16;
37653770
else if (Width <= 32)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
55
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
66
declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
7+
declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
78

89
; No crash on invalid input
910
; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
@@ -314,4 +315,21 @@ define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
314315
ret void
315316
}
316317

318+
; GCN-LABEL: {{^}}v_icmp_i1_ne0:
319+
; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
320+
; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
321+
; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
322+
; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
323+
; GCN-NEXT: v_mov_b32_e32
324+
; GCN-NEXT: v_mov_b32_e32
325+
; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
326+
define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
327+
%c0 = icmp ugt i32 %a, 1
328+
%c1 = icmp ugt i32 %b, 2
329+
%src = and i1 %c0, %c1
330+
%result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
331+
store i64 %result, i64 addrspace(1)* %out
332+
ret void
333+
}
334+
317335
attributes #0 = { nounwind readnone convergent }

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,6 +1406,7 @@ define float @fmed3_0_1_undef_f32() {
14061406

14071407
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
14081408
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
1409+
declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) nounwind readnone convergent
14091410

14101411
; Make sure there's no crash for invalid input
14111412
; CHECK-LABEL: @invalid_nonconstant_icmp_code(
@@ -1815,6 +1816,198 @@ define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
18151816
ret i64 %mask
18161817
}
18171818

1819+
; 1-bit NE comparisons
1820+
1821+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1(
1822+
; CHECK-NEXT: icmp
1823+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1824+
define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) {
1825+
%cmp = icmp eq i32 %a, %b
1826+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1827+
ret i64 %mask
1828+
}
1829+
1830+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1(
1831+
; CHECK-NEXT: icmp
1832+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1833+
define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) {
1834+
%cmp = icmp ne i32 %a, %b
1835+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1836+
ret i64 %mask
1837+
}
1838+
1839+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1(
1840+
; CHECK-NEXT: icmp
1841+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1842+
define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) {
1843+
%cmp = icmp sle i32 %a, %b
1844+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1845+
ret i64 %mask
1846+
}
1847+
1848+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64(
1849+
; CHECK-NEXT: icmp
1850+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1851+
define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) {
1852+
%cmp = icmp ugt i64 %a, %b
1853+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1854+
ret i64 %mask
1855+
}
1856+
1857+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64(
1858+
; CHECK-NEXT: icmp
1859+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1860+
define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) {
1861+
%cmp = icmp ugt i64 %a, %b
1862+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 false, i1 %cmp, i32 33)
1863+
ret i64 %mask
1864+
}
1865+
1866+
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32(
1867+
; CHECK-NEXT: fcmp
1868+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1869+
define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) {
1870+
%cmp = fcmp oeq float %a, %b
1871+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1872+
ret i64 %mask
1873+
}
1874+
1875+
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32(
1876+
; CHECK-NEXT: fcmp
1877+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1878+
define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) {
1879+
%cmp = fcmp une float %a, %b
1880+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1881+
ret i64 %mask
1882+
}
1883+
1884+
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64(
1885+
; CHECK-NEXT: fcmp
1886+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1887+
define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) {
1888+
%cmp = fcmp olt double %a, %b
1889+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1890+
ret i64 %mask
1891+
}
1892+
1893+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4(
1894+
; CHECK-NEXT: icmp
1895+
; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1896+
define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) {
1897+
%cmp = icmp eq i4 %a, %b
1898+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1899+
ret i64 %mask
1900+
}
1901+
1902+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8(
1903+
; CHECK-NEXT: icmp
1904+
; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1905+
define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) {
1906+
%cmp = icmp eq i8 %a, %b
1907+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1908+
ret i64 %mask
1909+
}
1910+
1911+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16(
1912+
; CHECK-NEXT: icmp
1913+
; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1914+
define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) {
1915+
%cmp = icmp eq i16 %a, %b
1916+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1917+
ret i64 %mask
1918+
}
1919+
1920+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36(
1921+
; CHECK-NEXT: icmp
1922+
; CHECK: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1923+
define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) {
1924+
%cmp = icmp eq i36 %a, %b
1925+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1926+
ret i64 %mask
1927+
}
1928+
1929+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128(
1930+
; CHECK-NEXT: icmp
1931+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1932+
define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) {
1933+
%cmp = icmp eq i128 %a, %b
1934+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1935+
ret i64 %mask
1936+
}
1937+
1938+
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16(
1939+
; CHECK-NEXT: fcmp
1940+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1941+
define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) {
1942+
%cmp = fcmp oeq half %a, %b
1943+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1944+
ret i64 %mask
1945+
}
1946+
1947+
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128(
1948+
; CHECK-NEXT: fcmp
1949+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1950+
define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) {
1951+
;
1952+
%cmp = fcmp oeq fp128 %a, %b
1953+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1954+
ret i64 %mask
1955+
}
1956+
1957+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4(
1958+
; CHECK-NEXT: icmp
1959+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1960+
define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) {
1961+
%cmp = icmp slt i4 %a, %b
1962+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1963+
ret i64 %mask
1964+
}
1965+
1966+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8(
1967+
; CHECK-NEXT: icmp
1968+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1969+
define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) {
1970+
%cmp = icmp slt i8 %a, %b
1971+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1972+
ret i64 %mask
1973+
}
1974+
1975+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16(
1976+
; CHECK-NEXT: icmp
1977+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1978+
define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) {
1979+
%cmp = icmp slt i16 %a, %b
1980+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1981+
ret i64 %mask
1982+
}
1983+
1984+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4(
1985+
; CHECK-NEXT: icmp
1986+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1987+
define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) {
1988+
%cmp = icmp ult i4 %a, %b
1989+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1990+
ret i64 %mask
1991+
}
1992+
1993+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8(
1994+
; CHECK-NEXT: icmp
1995+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1996+
define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) {
1997+
%cmp = icmp ult i8 %a, %b
1998+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
1999+
ret i64 %mask
2000+
}
2001+
2002+
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16(
2003+
; CHECK-NEXT: icmp
2004+
; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
2005+
define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
2006+
%cmp = icmp ult i16 %a, %b
2007+
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
2008+
ret i64 %mask
2009+
}
2010+
18182011
; --------------------------------------------------------------------
18192012
; llvm.amdgcn.fcmp
18202013
; --------------------------------------------------------------------

0 commit comments

Comments
 (0)