Skip to content

Commit b01adc6

Browse files
committed
AMDGPU: Strengthen some bfloat tests
Fix bitcast test, which was splitting apart phis intended to force bitcasts that survive all the way to selection. Disable the amdgpu-codegenprepare phi splitting, which defeats the technique of using a phi to ensure a bitcast reaches all the way to selection. Also add a variety of bfloat tests. These probably need revisiting to avoid the cast folding into argument loads. Also round out set of bfloat bitcast and ABI tests. Add codegen tests for more bf16 operations The promotion of these works contrary to the comment.
1 parent 9e574a3 commit b01adc6

11 files changed

+20539
-132
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll

Lines changed: 1259 additions & 4 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 12801 additions & 11 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/function-args-inreg.ll

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,5 +1870,149 @@ define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inr
18701870
ret void
18711871
}
18721872

1873+
define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
1874+
; GFX9-LABEL: void_func_bf16_inreg:
1875+
; GFX9: ; %bb.0:
1876+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1877+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1878+
; GFX9-NEXT: global_store_short_d16_hi v[0:1], v0, off
1879+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1880+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1881+
;
1882+
; GFX11-LABEL: void_func_bf16_inreg:
1883+
; GFX11: ; %bb.0:
1884+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1885+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
1886+
; GFX11-NEXT: global_store_d16_hi_b16 v[0:1], v0, off
1887+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1888+
store bfloat %arg0, ptr addrspace(1) undef
1889+
ret void
1890+
}
1891+
1892+
define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
1893+
; GFX9-LABEL: void_func_v2bf16_inreg:
1894+
; GFX9: ; %bb.0:
1895+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1896+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1897+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
1898+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1899+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1900+
;
1901+
; GFX11-LABEL: void_func_v2bf16_inreg:
1902+
; GFX11: ; %bb.0:
1903+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1904+
; GFX11-NEXT: v_mov_b32_e32 v0, s0
1905+
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
1906+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1907+
store <2 x bfloat> %arg0, ptr addrspace(1) undef
1908+
ret void
1909+
}
1910+
1911+
define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
1912+
; GFX9-LABEL: void_func_v3bf16_inreg:
1913+
; GFX9: ; %bb.0:
1914+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1915+
; GFX9-NEXT: v_mov_b32_e32 v0, s5
1916+
; GFX9-NEXT: global_store_short v[0:1], v0, off
1917+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1918+
; GFX9-NEXT: global_store_dword v[0:1], v0, off
1919+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1920+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1921+
;
1922+
; GFX11-LABEL: void_func_v3bf16_inreg:
1923+
; GFX11: ; %bb.0:
1924+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1925+
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
1926+
; GFX11-NEXT: s_clause 0x1
1927+
; GFX11-NEXT: global_store_b16 v[0:1], v0, off
1928+
; GFX11-NEXT: global_store_b32 v[0:1], v1, off
1929+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1930+
store <3 x bfloat> %arg0, ptr addrspace(1) undef
1931+
ret void
1932+
}
1933+
1934+
define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
1935+
; GFX9-LABEL: void_func_v4bf16_inreg:
1936+
; GFX9: ; %bb.0:
1937+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1938+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1939+
; GFX9-NEXT: v_mov_b32_e32 v1, s5
1940+
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
1941+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1942+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1943+
;
1944+
; GFX11-LABEL: void_func_v4bf16_inreg:
1945+
; GFX11: ; %bb.0:
1946+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1947+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1948+
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
1949+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1950+
store <4 x bfloat> %arg0, ptr addrspace(1) undef
1951+
ret void
1952+
}
1953+
1954+
define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
1955+
; GFX9-LABEL: void_func_v8bf16_inreg:
1956+
; GFX9: ; %bb.0:
1957+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1959+
; GFX9-NEXT: v_mov_b32_e32 v1, s5
1960+
; GFX9-NEXT: v_mov_b32_e32 v2, s6
1961+
; GFX9-NEXT: v_mov_b32_e32 v3, s7
1962+
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1963+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1964+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1965+
;
1966+
; GFX11-LABEL: void_func_v8bf16_inreg:
1967+
; GFX11: ; %bb.0:
1968+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969+
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1970+
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
1971+
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
1972+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1973+
store <8 x bfloat> %arg0, ptr addrspace(1) undef
1974+
ret void
1975+
}
1976+
1977+
define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 {
1978+
; GFX9-LABEL: void_func_v16bf16_inreg:
1979+
; GFX9: ; %bb.0:
1980+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1981+
; GFX9-NEXT: v_mov_b32_e32 v0, s8
1982+
; GFX9-NEXT: v_mov_b32_e32 v1, s9
1983+
; GFX9-NEXT: v_mov_b32_e32 v2, s10
1984+
; GFX9-NEXT: v_mov_b32_e32 v3, s11
1985+
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1986+
; GFX9-NEXT: s_nop 0
1987+
; GFX9-NEXT: v_mov_b32_e32 v0, s4
1988+
; GFX9-NEXT: v_mov_b32_e32 v1, s5
1989+
; GFX9-NEXT: v_mov_b32_e32 v2, s6
1990+
; GFX9-NEXT: v_mov_b32_e32 v3, s7
1991+
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
1992+
; GFX9-NEXT: s_waitcnt vmcnt(0)
1993+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1994+
;
1995+
; GFX11-LABEL: void_func_v16bf16_inreg:
1996+
; GFX11: ; %bb.0:
1997+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1998+
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
1999+
; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
2000+
; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
2001+
; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
2002+
; GFX11-NEXT: s_clause 0x1
2003+
; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off
2004+
; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off
2005+
; GFX11-NEXT: s_setpc_b64 s[30:31]
2006+
store <16 x bfloat> %arg0, ptr addrspace(1) undef
2007+
ret void
2008+
}
2009+
18732010
attributes #0 = { nounwind }
18742011
attributes #1 = { nounwind noinline }
2012+
2013+
2014+
2015+
2016+
2017+
2018+

0 commit comments

Comments
 (0)