Skip to content

Commit fd0ffb7

Browse files
authored
[ValueTracking] Recognize LShr(UINT_MAX, Y) + 1 as a power-of-two (#91171)
There is a missed optimization in ``` llvm define i8 @known_power_of_two_rust_next_power_of_two(i8 %x, i8 %y) { %2 = add i8 %x, -1 %3 = tail call i8 @llvm.ctlz.i8(i8 %2, i1 true) %4 = lshr i8 -1, %3 %5 = add i8 %4, 1 %6 = icmp ugt i8 %x, 1 %p = select i1 %6, i8 %5, i8 1 %r = urem i8 %y, %p ret i8 %r } ``` which is extracted from the Rust code ``` rust fn func(x: usize, y: usize) -> usize { let z = x.next_power_of_two(); y % z } ``` Here `%p` (a.k.a `z`) is semantically a power-of-two, so `y urem p` can be optimized to `y & (p - 1)`. (Alive2 proof: https://alive2.llvm.org/ce/z/H3zooY) --- It could be generalized to recognizing `LShr(UINT_MAX, Y) + 1` as a power-of-two, which is what this PR does. Alive2 proof: https://alive2.llvm.org/ce/z/zUPTbc
1 parent f34d30c commit fd0ffb7

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2173,6 +2173,11 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
21732173
if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
21742174
return true;
21752175
}
2176+
2177+
// LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2178+
if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2179+
if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2180+
return true;
21762181
return false;
21772182
}
21782183
case Instruction::Select:

llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,3 +387,119 @@ for.end:
387387
%r = phi i64 [ %sum, %for.body ]
388388
ret i64 %r
389389
}
390+
391+
; https://alive2.llvm.org/ce/z/3QfEHm
392+
define i8 @known_power_of_two_rust_next_power_of_two(i8 %x, i8 %y) {
393+
; CHECK-LABEL: @known_power_of_two_rust_next_power_of_two(
394+
; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -1
395+
; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[TMP1]], i1 true)
396+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i8 -1, [[TMP2]]
397+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[X]], 1
398+
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i8 [[TMP3]], i8 0
399+
; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP5]], [[Y:%.*]]
400+
; CHECK-NEXT: ret i8 [[R]]
401+
;
402+
%2 = add i8 %x, -1
403+
%3 = tail call i8 @llvm.ctlz.i8(i8 %2, i1 true)
404+
%4 = lshr i8 -1, %3
405+
%5 = add i8 %4, 1
406+
%6 = icmp ugt i8 %x, 1
407+
%p = select i1 %6, i8 %5, i8 1
408+
; Rust's implementation of `%p = next_power_of_two(%x)`
409+
410+
%r = urem i8 %y, %p
411+
ret i8 %r
412+
}
413+
414+
define i8 @known_power_of_two_lshr_add_one_allow_zero(i8 %x, i8 %y) {
415+
; CHECK-LABEL: @known_power_of_two_lshr_add_one_allow_zero(
416+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]]
417+
; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP1]], [[Y:%.*]]
418+
; CHECK-NEXT: ret i8 [[R]]
419+
;
420+
%4 = lshr i8 -1, %x
421+
%p = add i8 %4, 1
422+
423+
; Note: y % p --> y & (p - 1) allows p == 0
424+
%r = urem i8 %y, %p
425+
ret i8 %r
426+
}
427+
428+
define i1 @known_power_of_two_lshr_add_one_nuw_deny_zero(i8 %x, i8 %y) {
429+
; CHECK-LABEL: @known_power_of_two_lshr_add_one_nuw_deny_zero(
430+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]]
431+
; CHECK-NEXT: [[P:%.*]] = add nuw i8 [[TMP1]], 1
432+
; CHECK-NEXT: [[AND:%.*]] = and i8 [[P]], [[Y:%.*]]
433+
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[AND]], 0
434+
; CHECK-NEXT: ret i1 [[R]]
435+
;
436+
%4 = lshr i8 -1, %x
437+
%p = add nuw i8 %4, 1
438+
439+
; Note: A & B_Pow2 != B_Pow2 --> A & B_Pow2 == 0 requires B_Pow2 != 0
440+
%and = and i8 %p, %y
441+
%r = icmp ne i8 %and, %p
442+
ret i1 %r
443+
}
444+
445+
define i1 @negative_known_power_of_two_lshr_add_one_deny_zero(i8 %x, i8 %y) {
446+
; CHECK-LABEL: @negative_known_power_of_two_lshr_add_one_deny_zero(
447+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]]
448+
; CHECK-NEXT: [[P:%.*]] = add i8 [[TMP1]], 1
449+
; CHECK-NEXT: [[AND:%.*]] = and i8 [[P]], [[Y:%.*]]
450+
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[P]]
451+
; CHECK-NEXT: ret i1 [[R]]
452+
;
453+
%4 = lshr i8 -1, %x
454+
%p = add i8 %4, 1
455+
456+
; Note: A & B_Pow2 != B_Pow2 --> A & B_Pow2 == 0 requires B_Pow2 != 0
457+
%and = and i8 %p, %y
458+
%r = icmp ne i8 %and, %p
459+
ret i1 %r
460+
}
461+
462+
define i1 @negative_known_power_of_two_lshr_add_one_nsw_deny_zero(i8 %x, i8 %y) {
463+
; CHECK-LABEL: @negative_known_power_of_two_lshr_add_one_nsw_deny_zero(
464+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]]
465+
; CHECK-NEXT: [[P:%.*]] = add nsw i8 [[TMP1]], 1
466+
; CHECK-NEXT: [[AND:%.*]] = and i8 [[P]], [[Y:%.*]]
467+
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[P]]
468+
; CHECK-NEXT: ret i1 [[R]]
469+
;
470+
%4 = lshr i8 -1, %x
471+
%p = add nsw i8 %4, 1
472+
473+
; Note: A & B_Pow2 != B_Pow2 --> A & B_Pow2 == 0 requires B_Pow2 != 0
474+
%and = and i8 %p, %y
475+
%r = icmp ne i8 %and, %p
476+
ret i1 %r
477+
}
478+
479+
define i8 @known_power_of_two_lshr_add_negative_1(i8 %x, i8 %y) {
480+
; CHECK-LABEL: @known_power_of_two_lshr_add_negative_1(
481+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -2, [[X:%.*]]
482+
; CHECK-NEXT: [[P:%.*]] = add nuw i8 [[TMP1]], 1
483+
; CHECK-NEXT: [[R:%.*]] = urem i8 [[Y:%.*]], [[P]]
484+
; CHECK-NEXT: ret i8 [[R]]
485+
;
486+
%4 = lshr i8 -2, %x
487+
%p = add i8 %4, 1
488+
489+
%r = urem i8 %y, %p
490+
ret i8 %r
491+
}
492+
493+
define i8 @known_power_of_two_lshr_add_negative_2(i8 %x, i8 %y) {
494+
; CHECK-LABEL: @known_power_of_two_lshr_add_negative_2(
495+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -1, [[X:%.*]]
496+
; CHECK-NEXT: [[P:%.*]] = add nsw i8 [[TMP1]], -1
497+
; CHECK-NEXT: [[R:%.*]] = urem i8 [[Y:%.*]], [[P]]
498+
; CHECK-NEXT: ret i8 [[R]]
499+
;
500+
%4 = lshr i8 -1, %x
501+
%p = add i8 %4, -1
502+
503+
%r = urem i8 %y, %p
504+
ret i8 %r
505+
}

0 commit comments

Comments
 (0)