-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Replace some float undef test uses with poison #131090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Replace some float undef test uses with poison #131090
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesPatch is 135.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131090.diff 63 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
index 2d95008e3fd2d..f88d0e07f284e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
@@ -22,7 +22,7 @@ main_body:
%vcc = icmp eq i32 %val, 2
%a = select i1 %vcc, float %a0, float %a1
%b = select i1 %vcc, float %b0, float %b1
- call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float undef, float undef, i1 true, i1 true)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float poison, float poison, i1 true, i1 true)
ret void
}
@@ -52,7 +52,7 @@ main_body:
%uniform_i1 = icmp eq i32 %val, 2
%a = select i1 %uniform_i1, float %a0, float %a1
%b = select i1 %uniform_i1, float %b0, float %b1
- call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float undef, float undef, i1 true, i1 true)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float poison, float poison, i1 true, i1 true)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
index b4eb7e2f887e3..13a89372132a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
@@ -10,7 +10,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
; CHECK-NEXT: S_ENDPGM 0
- call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}
@@ -23,7 +23,7 @@ define amdgpu_vs void @test_f32(float %arg0) {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
; CHECK-NEXT: S_ENDPGM 0
- call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}
@@ -55,7 +55,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, ptr addrspace(4)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
; CHECK-NEXT: S_ENDPGM 0
%tmp0 = load volatile i32, ptr addrspace(4) %arg1
- call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
index 459cdbd9067e0..ec893feb8d9cb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
@@ -1543,7 +1543,7 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %ou
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, 0x41000000
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
- %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float poison, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, ptr addrspace(1) %out, align 4
ret void
@@ -1589,7 +1589,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %ou
; GFX11-NEXT: v_div_scale_f32 v0, null, 0x41000000, 0x41000000, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
- %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float poison, float 8.0, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, ptr addrspace(1) %out, align 4
ret void
@@ -1633,7 +1633,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %
; GFX11-NEXT: v_div_scale_f32 v0, null, s0, s0, s0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_endpgm
- %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
+ %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float poison, float poison, i1 false)
%result0 = extractvalue { float, i1 } %result, 0
store float %result0, ptr addrspace(1) %out, align 4
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
index 36455f190510e..67ec5cbdf9186 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -82,7 +82,7 @@ define float @v_mul_legacy_undef0_f32(float %a) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
- %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
+ %result = call float @llvm.amdgcn.fmul.legacy(float poison, float %a)
ret float %result
}
@@ -122,7 +122,7 @@ define float @v_mul_legacy_undef1_f32(float %a) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
- %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
+ %result = call float @llvm.amdgcn.fmul.legacy(float %a, float poison)
ret float %result
}
@@ -162,7 +162,7 @@ define float @v_mul_legacy_undef_f32() {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, s0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
- %result = call float @llvm.amdgcn.fmul.legacy(float undef, float undef)
+ %result = call float @llvm.amdgcn.fmul.legacy(float poison, float poison)
ret float %result
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
index bf8e10143003a..c70a2e6ee6758 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
@@ -175,7 +175,7 @@ define float @v_rsq_clamp_undef_f32() #0 {
; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_minmax_num_f32 v0, s0, 0x7f7fffff, v0
; GFX12-NEXT: s_setpc_b64 s[30:31]
- %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
+ %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float poison)
ret float %rsq_clamp
}
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index dd85edf59b18f..d3bf94e87d90e 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -7,7 +7,7 @@
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -23,7 +23,7 @@ main_body:
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -39,7 +39,7 @@ main_body:
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
+ %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -55,7 +55,7 @@ main_body:
; GCN: buffer_store_dword v0
define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
main_body:
- %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
+ %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
main_body:
- %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
+ %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float poison, <8 x i32> poison, <4 x i32> poison, i1 0, i32 0, i32 0)
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index 7aaf599583c80..a4f9ce3e7350a 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -4009,7 +4009,7 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1
; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
-; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
+; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1
; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
@@ -4049,9 +4049,9 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0
; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 3.200000e+01)
; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]])
-; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
+; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0
-; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef)
+; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison)
; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]]
; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]]
; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]])
@@ -4079,7 +4079,7 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; DAZ-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0
; DAZ-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1
; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP4]])
-; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef)
+; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison)
; DAZ-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0
; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1
; DAZ-NEXT: [[TMP10:%.*]] = fmul float [[TMP8]], [[TMP6]]
@@ -4101,7 +4101,7 @@ define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float>
; DAZ-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3
; DAZ-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]]
;
- %const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
+ %const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float poison, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2
ret <4 x float> %const.partial.rcp
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
index f3a9e761605a0..1af5938a5d48f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
@@ -17,7 +17,7 @@
define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
%rcf = bitcast i32 %rc to float
- call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %rcf, float undef, float undef, float undef, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %rcf, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
index 640f541172f4b..f8a1604351d9e 100644
--- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
@@ -37,7 +37,7 @@ bb2:
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) {
bb0:
%tmp = icmp sgt i32 %arg1, 4
- %undef = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
+ %undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef)
%tmp4 = select i1 %undef, float %arg, float 1.000000e+00
%tmp5 = fcmp ogt float %arg2, 0.000000e+00
%tmp6 = fcmp olt float %arg2, 1.000000e+00
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 1e40b4c9f04cf..2930c6efd02b7 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -38,7 +38,7 @@ bb:
%tmp5 = and i32 %tmp2, %tmp4
%tmp6 = icmp eq i32 %tmp5, 0
%tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
- %tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp7)
+ %tmp8 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %tmp7)
%tmp9 = bitcast <2 x half> %tmp8 to float
ret float %tmp9
}
diff --git a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
index c73a3b69d3716..04483baafc990 100644
--- a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
+++ b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
@@ -31,7 +31,7 @@ bb9: ; preds = %bb5
bb10: ; preds = %bb9, %bb5, %bb3, %bb
%tmp11 = phi float [ 1.000000e+00, %bb3 ], [ 0.000000e+00, %bb9 ], [ 1.000000e+00, %bb ], [ poison, %bb5 ]
- call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp11, float undef, float undef, float undef, i1 false, i1 false) #0
+ call void @llvm.amdgcn.exp.f32(i32 40, i32 15, float %tmp11, float poison, float poison, float poison, i1 false, i1 false) #0
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 4da5d727acb1b..03b9f9bf82f3c 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -143,21 +143,21 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX11-NEXT: v_max_f32_e32 v0, 0, v1
; GFX11-NEXT: ; return to shader part epilog
.entry:
- %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
+ %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i2243 = extractelement <3 x float> %0, i32 2
%1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 0, i32 0)
%2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%3 = bitcast <4 x i32> %2 to <4 x float>
%.i2248 = extractelement <4 x float> %3, i32 2
%.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
- %4 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
- %5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
+ %4 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
+ %5 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i2333 = extractelement <3 x float> %5, i32 2
- %6 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
- %7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
+ %6 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float poison, float 0.000000e+00, float 1.000000e+00)
+ %7 = call <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 3, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%.i1408 = extractelement <2 x float> %7, i32 1
%.i0364 = extractelement <2 x float> %7, i32 0
- %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
+ %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
%9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 112, i32 0)
%10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%11 = bitcast <4 x i32> %10 to <4 x float>
@@ -175,7 +175,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370
%.i2383 = fmul reassoc nnan nsz arcp contract afn float %.i2376, %6
%.i2386 = fadd reassoc nnan nsz arcp contract afn float %.i2370, %.i2383
- %18 = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fmed3.f32(float undef, float 0.000000e+00, float 1.000000e+00)
+ %18 = call reassoc nna...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 1eb5588457c39a924902971b7d883e319a499c17 80e70c7d3063f54130ad85f690e8a43cc941e14e llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll llvm/test/CodeGen/AMDGPU/commute-shifts.ll llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll llvm/test/CodeGen/AMDGPU/llvm.exp.ll llvm/test/CodeGen/AMDGPU/llvm.exp10.ll llvm/test/CodeGen/AMDGPU/llvm.exp2.ll llvm/test/CodeGen/AMDGPU/llvm.log.ll llvm/test/CodeGen/AMDGPU/llvm.log10.ll llvm/test/CodeGen/AMDGPU/llvm.log2.ll llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll llvm/test/CodeGen/AMDGPU/memory_clause.ll llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll llvm/test/CodeGen/AMDGPU/rename-disconnected-bug.ll llvm/test/CodeGen/AMDGPU/ret.ll llvm/test/CodeGen/AMDGPU/scratch-simple.ll llvm/test/CodeGen/AMDGPU/select-undef.ll llvm/test/CodeGen/AMDGPU/sgpr-copy.ll llvm/test/CodeGen/AMDGPU/si-spill-cf.ll llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll llvm/test/CodeGen/AMDGPU/skip-if-dead.ll llvm/test/CodeGen/AMDGPU/smrd.ll llvm/test/CodeGen/AMDGPU/split-smrd.ll llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll llvm/test/CodeGen/AMDGPU/wave32.ll llvm/test/CodeGen/AMDGPU/wqm.ll llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
@@ -37,7 +37,7 @@ bb2: | |||
define amdgpu_kernel void @preserve_condition_undef_flag(float %arg, i32 %arg1, float %arg2) { | |||
bb0: | |||
%tmp = icmp sgt i32 %arg1, 4 | |||
%undef = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef) | |||
%undef = call i1 @llvm.amdgcn.class.f32(float poison, i32 undef) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
second undef is needed here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is only touching float typed uses
f66d04c
to
5d77e1c
Compare
4ed5185
to
80e70c7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
80e70c7
to
54e0c20
Compare
No description provided.