Skip to content

Commit 1e36c96

Browse files
authored
[InstCombine] Fold ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z) (#88193)
Proofs: https://alive2.llvm.org/ce/z/N9dRzP https://alive2.llvm.org/ce/z/Xrpc-Y https://alive2.llvm.org/ce/z/BagBM6
1 parent 6cba93f commit 1e36c96

File tree

2 files changed

+288
-2
lines changed

2 files changed

+288
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,54 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
12591259
match(Op1, m_SpecificIntAllowPoison(BitWidth - 1)))
12601260
return new ZExtInst(Builder.CreateIsNotNeg(X, "isnotneg"), Ty);
12611261

1262+
// ((X << nuw Z) sub nuw Y) >>u exact Z --> X sub nuw (Y >>u exact Z),
1263+
Value *Y;
1264+
if (I.isExact() &&
1265+
match(Op0, m_OneUse(m_NUWSub(m_NUWShl(m_Value(X), m_Specific(Op1)),
1266+
m_Value(Y))))) {
1267+
Value *NewLshr = Builder.CreateLShr(Y, Op1, "", /*isExact=*/true);
1268+
auto *NewSub = BinaryOperator::CreateNUWSub(X, NewLshr);
1269+
NewSub->setHasNoSignedWrap(
1270+
cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap());
1271+
return NewSub;
1272+
}
1273+
1274+
auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) {
1275+
switch (BinOpcode) {
1276+
default:
1277+
return false;
1278+
case Instruction::Add:
1279+
case Instruction::And:
1280+
case Instruction::Or:
1281+
case Instruction::Xor:
1282+
// And does not work here, and sub is handled separately.
1283+
return true;
1284+
}
1285+
};
1286+
1287+
// If both the binop and the shift are nuw, then:
1288+
// ((X << nuw Z) binop nuw Y) >>u Z --> X binop nuw (Y >>u Z)
1289+
if (match(Op0, m_OneUse(m_c_BinOp(m_NUWShl(m_Value(X), m_Specific(Op1)),
1290+
m_Value(Y))))) {
1291+
BinaryOperator *Op0OB = cast<BinaryOperator>(Op0);
1292+
if (isSuitableBinOpcode(Op0OB->getOpcode())) {
1293+
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op0);
1294+
!OBO || OBO->hasNoUnsignedWrap()) {
1295+
Value *NewLshr = Builder.CreateLShr(
1296+
Y, Op1, "", I.isExact() && Op0OB->getOpcode() != Instruction::And);
1297+
auto *NewBinOp = BinaryOperator::Create(Op0OB->getOpcode(), NewLshr, X);
1298+
if (OBO) {
1299+
NewBinOp->setHasNoUnsignedWrap(true);
1300+
NewBinOp->setHasNoSignedWrap(OBO->hasNoSignedWrap());
1301+
} else if (auto *Disjoint = dyn_cast<PossiblyDisjointInst>(Op0)) {
1302+
cast<PossiblyDisjointInst>(NewBinOp)->setIsDisjoint(
1303+
Disjoint->isDisjoint());
1304+
}
1305+
return NewBinOp;
1306+
}
1307+
}
1308+
}
1309+
12621310
if (match(Op1, m_APInt(C))) {
12631311
unsigned ShAmtC = C->getZExtValue();
12641312
auto *II = dyn_cast<IntrinsicInst>(Op0);
@@ -1275,7 +1323,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
12751323
return new ZExtInst(Cmp, Ty);
12761324
}
12771325

1278-
Value *X;
12791326
const APInt *C1;
12801327
if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
12811328
if (C1->ult(ShAmtC)) {
@@ -1320,7 +1367,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
13201367
// ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
13211368
// TODO: Consolidate with the more general transform that starts from shl
13221369
// (the shifts are in the opposite order).
1323-
Value *Y;
13241370
if (match(Op0,
13251371
m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))),
13261372
m_Value(Y))))) {

llvm/test/Transforms/InstCombine/lshr.ll

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,17 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
163163
ret <2 x i8> %lshr
164164
}
165165

166+
define <2 x i8> @lshr_exact_splat_vec_nuw(<2 x i8> %x) {
167+
; CHECK-LABEL: @lshr_exact_splat_vec_nuw(
168+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 1, i8 1>
169+
; CHECK-NEXT: ret <2 x i8> [[LSHR]]
170+
;
171+
%shl = shl nuw <2 x i8> %x, <i8 2, i8 2>
172+
%add = add nuw <2 x i8> %shl, <i8 4, i8 4>
173+
%lshr = lshr <2 x i8> %add, <i8 2, i8 2>
174+
ret <2 x i8> %lshr
175+
}
176+
166177
define i8 @shl_add(i8 %x, i8 %y) {
167178
; CHECK-LABEL: @shl_add(
168179
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2
@@ -360,8 +371,222 @@ define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
360371
ret <3 x i14> %t
361372
}
362373

374+
define i32 @shl_add_lshr_flag_preservation(i32 %x, i32 %c, i32 %y) {
375+
; CHECK-LABEL: @shl_add_lshr_flag_preservation(
376+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[Y:%.*]], [[C:%.*]]
377+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw nsw i32 [[TMP1]], [[X:%.*]]
378+
; CHECK-NEXT: ret i32 [[LSHR]]
379+
;
380+
%shl = shl nuw i32 %x, %c
381+
%add = add nuw nsw i32 %shl, %y
382+
%lshr = lshr exact i32 %add, %c
383+
ret i32 %lshr
384+
}
385+
386+
define i32 @shl_add_lshr(i32 %x, i32 %c, i32 %y) {
387+
; CHECK-LABEL: @shl_add_lshr(
388+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
389+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X:%.*]]
390+
; CHECK-NEXT: ret i32 [[LSHR]]
391+
;
392+
%shl = shl nuw i32 %x, %c
393+
%add = add nuw i32 %shl, %y
394+
%lshr = lshr i32 %add, %c
395+
ret i32 %lshr
396+
}
397+
398+
define i32 @shl_add_lshr_comm(i32 %x, i32 %c, i32 %y) {
399+
; CHECK-LABEL: @shl_add_lshr_comm(
400+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
401+
; CHECK-NEXT: [[LSHR:%.*]] = add nuw i32 [[TMP1]], [[X:%.*]]
402+
; CHECK-NEXT: ret i32 [[LSHR]]
403+
;
404+
%shl = shl nuw i32 %x, %c
405+
%add = add nuw i32 %y, %shl
406+
%lshr = lshr i32 %add, %c
407+
ret i32 %lshr
408+
}
409+
363410
; Negative test
364411

412+
define i32 @shl_add_lshr_no_nuw(i32 %x, i32 %c, i32 %y) {
413+
; CHECK-LABEL: @shl_add_lshr_no_nuw(
414+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
415+
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SHL]], [[Y:%.*]]
416+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ADD]], [[C]]
417+
; CHECK-NEXT: ret i32 [[LSHR]]
418+
;
419+
%shl = shl nuw i32 %x, %c
420+
%add = add i32 %shl, %y
421+
%lshr = lshr i32 %add, %c
422+
ret i32 %lshr
423+
}
424+
425+
; Negative test
426+
427+
define i32 @shl_sub_lshr_not_exact(i32 %x, i32 %c, i32 %y) {
428+
; CHECK-LABEL: @shl_sub_lshr_not_exact(
429+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[C:%.*]]
430+
; CHECK-NEXT: [[SUB:%.*]] = sub nuw i32 [[SHL]], [[Y:%.*]]
431+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[SUB]], [[C]]
432+
; CHECK-NEXT: ret i32 [[LSHR]]
433+
;
434+
%shl = shl nuw i32 %x, %c
435+
%sub = sub nuw i32 %shl, %y
436+
%lshr = lshr i32 %sub, %c
437+
ret i32 %lshr
438+
}
439+
440+
; Negative test
441+
442+
define i32 @shl_sub_lshr_no_nuw(i32 %x, i32 %c, i32 %y) {
443+
; CHECK-LABEL: @shl_sub_lshr_no_nuw(
444+
; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[X:%.*]], [[C:%.*]]
445+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[SHL]], [[Y:%.*]]
446+
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i32 [[SUB]], [[C]]
447+
; CHECK-NEXT: ret i32 [[LSHR]]
448+
;
449+
%shl = shl nsw i32 %x, %c
450+
%sub = sub nsw i32 %shl, %y
451+
%lshr = lshr exact i32 %sub, %c
452+
ret i32 %lshr
453+
}
454+
455+
define i32 @shl_sub_lshr(i32 %x, i32 %c, i32 %y) {
456+
; CHECK-LABEL: @shl_sub_lshr(
457+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[Y:%.*]], [[C:%.*]]
458+
; CHECK-NEXT: [[LSHR:%.*]] = sub nuw nsw i32 [[X:%.*]], [[TMP1]]
459+
; CHECK-NEXT: ret i32 [[LSHR]]
460+
;
461+
%shl = shl nuw i32 %x, %c
462+
%sub = sub nuw nsw i32 %shl, %y
463+
%lshr = lshr exact i32 %sub, %c
464+
ret i32 %lshr
465+
}
466+
467+
define i32 @shl_or_lshr(i32 %x, i32 %c, i32 %y) {
468+
; CHECK-LABEL: @shl_or_lshr(
469+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
470+
; CHECK-NEXT: [[LSHR:%.*]] = or i32 [[TMP1]], [[X:%.*]]
471+
; CHECK-NEXT: ret i32 [[LSHR]]
472+
;
473+
%shl = shl nuw i32 %x, %c
474+
%or = or i32 %shl, %y
475+
%lshr = lshr i32 %or, %c
476+
ret i32 %lshr
477+
}
478+
479+
define i32 @shl_or_disjoint_lshr(i32 %x, i32 %c, i32 %y) {
480+
; CHECK-LABEL: @shl_or_disjoint_lshr(
481+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
482+
; CHECK-NEXT: [[LSHR:%.*]] = or disjoint i32 [[TMP1]], [[X:%.*]]
483+
; CHECK-NEXT: ret i32 [[LSHR]]
484+
;
485+
%shl = shl nuw i32 %x, %c
486+
%or = or disjoint i32 %shl, %y
487+
%lshr = lshr i32 %or, %c
488+
ret i32 %lshr
489+
}
490+
491+
define i32 @shl_or_lshr_comm(i32 %x, i32 %c, i32 %y) {
492+
; CHECK-LABEL: @shl_or_lshr_comm(
493+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
494+
; CHECK-NEXT: [[LSHR:%.*]] = or i32 [[TMP1]], [[X:%.*]]
495+
; CHECK-NEXT: ret i32 [[LSHR]]
496+
;
497+
%shl = shl nuw i32 %x, %c
498+
%or = or i32 %y, %shl
499+
%lshr = lshr i32 %or, %c
500+
ret i32 %lshr
501+
}
502+
503+
define i32 @shl_or_disjoint_lshr_comm(i32 %x, i32 %c, i32 %y) {
504+
; CHECK-LABEL: @shl_or_disjoint_lshr_comm(
505+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
506+
; CHECK-NEXT: [[LSHR:%.*]] = or disjoint i32 [[TMP1]], [[X:%.*]]
507+
; CHECK-NEXT: ret i32 [[LSHR]]
508+
;
509+
%shl = shl nuw i32 %x, %c
510+
%or = or disjoint i32 %y, %shl
511+
%lshr = lshr i32 %or, %c
512+
ret i32 %lshr
513+
}
514+
515+
define i32 @shl_xor_lshr(i32 %x, i32 %c, i32 %y) {
516+
; CHECK-LABEL: @shl_xor_lshr(
517+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
518+
; CHECK-NEXT: [[LSHR:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
519+
; CHECK-NEXT: ret i32 [[LSHR]]
520+
;
521+
%shl = shl nuw i32 %x, %c
522+
%xor = xor i32 %shl, %y
523+
%lshr = lshr i32 %xor, %c
524+
ret i32 %lshr
525+
}
526+
527+
define i32 @shl_xor_lshr_comm(i32 %x, i32 %c, i32 %y) {
528+
; CHECK-LABEL: @shl_xor_lshr_comm(
529+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
530+
; CHECK-NEXT: [[LSHR:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
531+
; CHECK-NEXT: ret i32 [[LSHR]]
532+
;
533+
%shl = shl nuw i32 %x, %c
534+
%xor = xor i32 %y, %shl
535+
%lshr = lshr i32 %xor, %c
536+
ret i32 %lshr
537+
}
538+
539+
define i32 @shl_and_lshr(i32 %x, i32 %c, i32 %y) {
540+
; CHECK-LABEL: @shl_and_lshr(
541+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
542+
; CHECK-NEXT: [[LSHR:%.*]] = and i32 [[TMP1]], [[X:%.*]]
543+
; CHECK-NEXT: ret i32 [[LSHR]]
544+
;
545+
%shl = shl nuw i32 %x, %c
546+
%and = and i32 %shl, %y
547+
%lshr = lshr i32 %and, %c
548+
ret i32 %lshr
549+
}
550+
551+
define i32 @shl_and_lshr_comm(i32 %x, i32 %c, i32 %y) {
552+
; CHECK-LABEL: @shl_and_lshr_comm(
553+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
554+
; CHECK-NEXT: [[LSHR:%.*]] = and i32 [[TMP1]], [[X:%.*]]
555+
; CHECK-NEXT: ret i32 [[LSHR]]
556+
;
557+
%shl = shl nuw i32 %x, %c
558+
%and = and i32 %y, %shl
559+
%lshr = lshr i32 %and, %c
560+
ret i32 %lshr
561+
}
562+
563+
define i32 @shl_lshr_and_exact(i32 %x, i32 %c, i32 %y) {
564+
; CHECK-LABEL: @shl_lshr_and_exact(
565+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[Y:%.*]], [[C:%.*]]
566+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X:%.*]]
567+
; CHECK-NEXT: ret i32 [[TMP2]]
568+
;
569+
%2 = shl nuw i32 %x, %c
570+
%3 = and i32 %2, %y
571+
%4 = lshr exact i32 %3, %c
572+
ret i32 %4
573+
}
574+
575+
; Negative test
576+
577+
define i32 @shl_add_lshr_neg(i32 %x, i32 %y, i32 %z) {
578+
; CHECK-LABEL: @shl_add_lshr_neg(
579+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[Y:%.*]]
580+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[SHL]], [[Z:%.*]]
581+
; CHECK-NEXT: [[RES:%.*]] = lshr exact i32 [[ADD]], [[Z]]
582+
; CHECK-NEXT: ret i32 [[RES]]
583+
;
584+
%shl = shl nuw i32 %x, %y
585+
%add = add nuw nsw i32 %shl, %z
586+
%res = lshr exact i32 %add, %z
587+
ret i32 %res
588+
}
589+
365590
define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
366591
; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
367592
; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538
@@ -375,6 +600,21 @@ define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
375600

376601
; Negative test
377602

603+
define i32 @shl_add_lshr_multiuse(i32 %x, i32 %y, i32 %z) {
604+
; CHECK-LABEL: @shl_add_lshr_multiuse(
605+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 [[X:%.*]], [[Y:%.*]]
606+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[SHL]], [[Z:%.*]]
607+
; CHECK-NEXT: call void @use(i32 [[ADD]])
608+
; CHECK-NEXT: [[RES:%.*]] = lshr exact i32 [[ADD]], [[Z]]
609+
; CHECK-NEXT: ret i32 [[RES]]
610+
;
611+
%shl = shl nuw i32 %x, %y
612+
%add = add nuw nsw i32 %shl, %z
613+
call void @use (i32 %add)
614+
%res = lshr exact i32 %add, %z
615+
ret i32 %res
616+
}
617+
378618
define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
379619
; CHECK-LABEL: @mul_splat_fold_wrong_lshr_const(
380620
; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537

0 commit comments

Comments
 (0)