Skip to content

Commit 08dd040

Browse files
authored
AMDGPU: Use minnum instead of maxnum for fmed3 src2-nan fold (#139531)
By the pseudocode in the ISA manual, if any input is a nan it acts like min3, which will fold to min2 of the other operands. The other cases fold to min, I'm not sure how this one was wrong.
1 parent bfd2ef7 commit 08dd040

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
867867
} else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
868868
V = IC.Builder.CreateMinNum(Src0, Src2);
869869
} else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
870-
V = IC.Builder.CreateMaxNum(Src0, Src1);
870+
V = IC.Builder.CreateMinNum(Src0, Src1);
871871
}
872872

873873
if (V) {

llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ define float @fmed3_x_undef_y_f32(float %x, float %y) #1 {
117117
define float @fmed3_x_y_undef_f32(float %x, float %y) #1 {
118118
; CHECK-LABEL: define float @fmed3_x_y_undef_f32(
119119
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
120-
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
120+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
121121
; CHECK-NEXT: ret float [[MED3]]
122122
;
123123
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
@@ -147,7 +147,7 @@ define float @fmed3_x_qnan0_y_f32(float %x, float %y) #1 {
147147
define float @fmed3_x_y_qnan0_f32(float %x, float %y) #1 {
148148
; CHECK-LABEL: define float @fmed3_x_y_qnan0_f32(
149149
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
150-
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
150+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
151151
; CHECK-NEXT: ret float [[MED3]]
152152
;
153153
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
@@ -276,7 +276,7 @@ define float @fmed3_0_nan_1_f32() #1 {
276276
define float @fmed3_0_1_nan_f32() #1 {
277277
; CHECK-LABEL: define float @fmed3_0_1_nan_f32(
278278
; CHECK-SAME: ) #[[ATTR1]] {
279-
; CHECK-NEXT: ret float 1.000000e+00
279+
; CHECK-NEXT: ret float 0.000000e+00
280280
;
281281
%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8001000000000)
282282
ret float %med
@@ -303,7 +303,7 @@ define float @fmed3_0_undef_1_f32() #1 {
303303
define float @fmed3_0_1_undef_f32() #1 {
304304
; CHECK-LABEL: define float @fmed3_0_1_undef_f32(
305305
; CHECK-SAME: ) #[[ATTR1]] {
306-
; CHECK-NEXT: ret float 1.000000e+00
306+
; CHECK-NEXT: ret float 0.000000e+00
307307
;
308308
%med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float undef)
309309
ret float %med
@@ -359,7 +359,7 @@ define float @fmed3_x_snan1_y_f32(float %x, float %y) #1 {
359359
define float @fmed3_x_y_snan1_f32(float %x, float %y) #1 {
360360
; CHECK-LABEL: define float @fmed3_x_y_snan1_f32(
361361
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
362-
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
362+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
363363
; CHECK-NEXT: ret float [[MED3]]
364364
;
365365
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
@@ -414,7 +414,7 @@ define float @fmed3_snan1_neg1_2_f32(float %x, float %y) #1 {
414414
define float @fmed3_neg2_3_snan1_f32(float %x, float %y) #1 {
415415
; CHECK-LABEL: define float @fmed3_neg2_3_snan1_f32(
416416
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
417-
; CHECK-NEXT: ret float 3.000000e+00
417+
; CHECK-NEXT: ret float -2.000000e+00
418418
;
419419
%med3 = call float @llvm.amdgcn.fmed3.f32(float -2.0, float 3.0, float 0x7FF4000000000000)
420420
ret float %med3
@@ -447,7 +447,7 @@ define amdgpu_ps float @amdgpu_ps_default_fmed3_x_snan1_y_f32(float %x, float %y
447447
define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(float %x, float %y) {
448448
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_default_fmed3_x_y_snan1_f32(
449449
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR2]] {
450-
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
450+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
451451
; CHECK-NEXT: ret float [[MED3]]
452452
;
453453
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)
@@ -480,7 +480,7 @@ define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_snan1_y_f32(float %x, float %y) #
480480
define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(float %x, float %y) #1 {
481481
; CHECK-LABEL: define amdgpu_ps float @amdgpu_ps_attr_fmed3_x_y_snan1_f32(
482482
; CHECK-SAME: float [[X:%.*]], float [[Y:%.*]]) #[[ATTR1]] {
483-
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.maxnum.f32(float [[X]], float [[Y]])
483+
; CHECK-NEXT: [[MED3:%.*]] = call float @llvm.minnum.f32(float [[X]], float [[Y]])
484484
; CHECK-NEXT: ret float [[MED3]]
485485
;
486486
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF4000000000000)

0 commit comments

Comments
 (0)