Skip to content

Commit a747e86

Browse files
authored
[InstCombine] Fold fpto{s|u}i non-norm to zero (#85569)
This patch enables more optimization after canonicalizing `fmul X, 0.0` into a copysign. I decide to implement this fold in InstCombine because `computeKnownFPClass` may be expensive. Alive2: https://alive2.llvm.org/ce/z/ASM8tQ
1 parent 6aaf9c8 commit a747e86

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1923,17 +1923,36 @@ Instruction *InstCombinerImpl::foldItoFPtoI(CastInst &FI) {
19231923
return replaceInstUsesWith(FI, X);
19241924
}
19251925

1926+
static Instruction *foldFPtoI(Instruction &FI, InstCombiner &IC) {
1927+
// fpto{u/s}i non-norm --> 0
1928+
FPClassTest Mask =
1929+
FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
1930+
KnownFPClass FPClass =
1931+
computeKnownFPClass(FI.getOperand(0), Mask, /*Depth=*/0,
1932+
IC.getSimplifyQuery().getWithInstruction(&FI));
1933+
if (FPClass.isKnownNever(Mask))
1934+
return IC.replaceInstUsesWith(FI, ConstantInt::getNullValue(FI.getType()));
1935+
1936+
return nullptr;
1937+
}
1938+
19261939
Instruction *InstCombinerImpl::visitFPToUI(FPToUIInst &FI) {
19271940
if (Instruction *I = foldItoFPtoI(FI))
19281941
return I;
19291942

1943+
if (Instruction *I = foldFPtoI(FI, *this))
1944+
return I;
1945+
19301946
return commonCastTransforms(FI);
19311947
}
19321948

19331949
Instruction *InstCombinerImpl::visitFPToSI(FPToSIInst &FI) {
19341950
if (Instruction *I = foldItoFPtoI(FI))
19351951
return I;
19361952

1953+
if (Instruction *I = foldFPtoI(FI, *this))
1954+
return I;
1955+
19371956
return commonCastTransforms(FI);
19381957
}
19391958

llvm/test/Transforms/InstCombine/fpcast.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,91 @@ define double @masked_uint_to_fpext3(i32 %x) {
347347
%r = fpext float %f to double
348348
ret double %r
349349
}
350+
351+
define i32 @fptosi_nonnorm(float nofpclass(norm) %x) {
352+
; CHECK-LABEL: @fptosi_nonnorm(
353+
; CHECK-NEXT: ret i32 0
354+
;
355+
%ret = fptosi float %x to i32
356+
ret i32 %ret
357+
}
358+
359+
define i32 @fptoui_nonnorm(float nofpclass(pnorm) %x) {
360+
; CHECK-LABEL: @fptoui_nonnorm(
361+
; CHECK-NEXT: ret i32 0
362+
;
363+
%ret = fptoui float %x to i32
364+
ret i32 %ret
365+
}
366+
367+
define i32 @fptosi_nonnnorm(float nofpclass(nnorm) %x) {
368+
; CHECK-LABEL: @fptosi_nonnnorm(
369+
; CHECK-NEXT: [[RET:%.*]] = fptosi float [[X:%.*]] to i32
370+
; CHECK-NEXT: ret i32 [[RET]]
371+
;
372+
%ret = fptosi float %x to i32
373+
ret i32 %ret
374+
}
375+
376+
define i32 @fptoui_nonnnorm(float nofpclass(nnorm) %x) {
377+
; CHECK-LABEL: @fptoui_nonnnorm(
378+
; CHECK-NEXT: [[RET:%.*]] = fptoui float [[X:%.*]] to i32
379+
; CHECK-NEXT: ret i32 [[RET]]
380+
;
381+
%ret = fptoui float %x to i32
382+
ret i32 %ret
383+
}
384+
385+
define i32 @fptosi_nonnorm_copysign(float %x) {
386+
; CHECK-LABEL: @fptosi_nonnorm_copysign(
387+
; CHECK-NEXT: ret i32 0
388+
;
389+
%val = call float @llvm.copysign.f32(float 0.0, float %x)
390+
%ret = fptosi float %val to i32
391+
ret i32 %ret
392+
}
393+
394+
define <2 x i32> @fptosi_nonnorm_copysign_vec(<2 x float> %x) {
395+
; CHECK-LABEL: @fptosi_nonnorm_copysign_vec(
396+
; CHECK-NEXT: ret <2 x i32> zeroinitializer
397+
;
398+
%val = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> %x)
399+
%ret = fptosi <2 x float> %val to <2 x i32>
400+
ret <2 x i32> %ret
401+
}
402+
403+
define i32 @fptosi_nonnorm_fmul(float %x) {
404+
; CHECK-LABEL: @fptosi_nonnorm_fmul(
405+
; CHECK-NEXT: [[SEL:%.*]] = fmul float [[X:%.*]], 0.000000e+00
406+
; CHECK-NEXT: [[RET:%.*]] = fptosi float [[SEL]] to i32
407+
; CHECK-NEXT: ret i32 [[RET]]
408+
;
409+
%sel = fmul float %x, 0.000000e+00
410+
%ret = fptosi float %sel to i32
411+
ret i32 %ret
412+
}
413+
414+
define i32 @fptosi_select(i1 %cond) {
415+
; CHECK-LABEL: @fptosi_select(
416+
; CHECK-NEXT: [[RET:%.*]] = select i1 [[COND:%.*]], i32 1, i32 -1
417+
; CHECK-NEXT: ret i32 [[RET]]
418+
;
419+
%sel = select i1 %cond, float 1.0, float -1.0
420+
%ret = fptosi float %sel to i32
421+
ret i32 %ret
422+
}
423+
424+
define i32 @mul_pos_zero_convert(i32 %a) {
425+
; CHECK-LABEL: @mul_pos_zero_convert(
426+
; CHECK-NEXT: entry:
427+
; CHECK-NEXT: [[FP:%.*]] = sitofp i32 [[A:%.*]] to float
428+
; CHECK-NEXT: [[RET:%.*]] = fmul float [[FP]], 0.000000e+00
429+
; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[RET]] to i32
430+
; CHECK-NEXT: ret i32 [[CONV]]
431+
;
432+
entry:
433+
%fp = sitofp i32 %a to float
434+
%ret = fmul float %fp, 0.000000e+00
435+
%conv = fptosi float %ret to i32
436+
ret i32 %conv
437+
}

0 commit comments

Comments
 (0)