Skip to content

Commit 7e7cb7e

Browse files
committed
Merged main:077e1b892d95 into amd-gfx:1d4c3cc2fa8e
Local branch amd-gfx 1d4c3cc Merged main:503bc5f66111 into amd-gfx:2a4e1350d643 Remote branch main 077e1b8 [clang] Preserve UDL nodes in RemoveNestedImmediateInvocation (llvm#66641)
2 parents 1d4c3cc + 077e1b8 commit 7e7cb7e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+4213
-3933
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,8 @@ Bug Fixes in This Version
325325
(`#67722 <https://github.com/llvm/llvm-project/issues/67722>`_).
326326
- Fixes a crash when instantiating a lambda with requires clause.
327327
(`#64462 <https://github.com/llvm/llvm-project/issues/64462>`_)
328+
- Fixes a regression where the ``UserDefinedLiteral`` was not properly preserved
329+
while evaluating consteval functions. (`#63898 <https://github.com/llvm/llvm-project/issues/63898>`_).
328330

329331
Bug Fixes to Compiler Builtins
330332
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

clang/lib/Sema/SemaExpr.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18468,7 +18468,10 @@ static void RemoveNestedImmediateInvocation(
1846818468
DRSet.erase(cast<DeclRefExpr>(E->getCallee()->IgnoreImplicit()));
1846918469
return Base::TransformCXXOperatorCallExpr(E);
1847018470
}
18471-
/// Base::TransformInitializer skip ConstantExpr so we need to visit them
18471+
/// Base::TransformUserDefinedLiteral doesn't preserve the
18472+
/// UserDefinedLiteral node.
18473+
ExprResult TransformUserDefinedLiteral(UserDefinedLiteral *E) { return E; }
18474+
/// Base::TransformInitializer skips ConstantExpr so we need to visit them
1847218475
/// here.
1847318476
ExprResult TransformInitializer(Expr *Init, bool NotCopyInit) {
1847418477
if (!Init)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %clang_cc1 -xc++ -std=c++23 -ast-dump %s | FileCheck %s
2+
3+
int inline consteval operator""_u32(unsigned long long val) {
4+
return val;
5+
}
6+
7+
void udl() {
8+
(void)(0_u32 + 1_u32);
9+
}
10+
11+
// CHECK: `-BinaryOperator {{.+}} <col:10, col:18> 'int' '+'
12+
// CHECK-NEXT: |-ConstantExpr {{.+}} <col:10> 'int'
13+
// CHECK-NEXT: | |-value: Int 0
14+
// CHECK-NEXT: | `-UserDefinedLiteral {{.+}} <col:10> 'int'
15+
// CHECK: `-ConstantExpr {{.+}} <col:18> 'int'
16+
// CHECK-NEXT: |-value: Int 1
17+
// CHECK-NEXT: `-UserDefinedLiteral {{.+}} <col:18> 'int'

libcxx/include/__algorithm/unwrap_range.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct __unwrap_range_impl {
5050
}
5151

5252
_LIBCPP_HIDE_FROM_ABI static constexpr auto __rewrap(const _Iter&, _Iter __iter)
53-
requires (!(random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>))
53+
requires(!(random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>))
5454
{
5555
return __iter;
5656
}
@@ -73,10 +73,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __unwrap_range(_Iter __first, _Sent __last)
7373
return __unwrap_range_impl<_Iter, _Sent>::__unwrap(std::move(__first), std::move(__last));
7474
}
7575

76-
template <
77-
class _Sent,
78-
class _Iter,
79-
class _Unwrapped = decltype(std::__unwrap_range(std::declval<_Iter>(), std::declval<_Sent>()))>
76+
template < class _Sent, class _Iter, class _Unwrapped>
8077
_LIBCPP_HIDE_FROM_ABI constexpr _Iter __rewrap_range(_Iter __orig_iter, _Unwrapped __iter) {
8178
return __unwrap_range_impl<_Iter, _Sent>::__rewrap(std::move(__orig_iter), std::move(__iter));
8279
}
@@ -86,7 +83,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR pair<_Unwrapped, _Unwrapped> __unwrap_ra
8683
return std::make_pair(std::__unwrap_iter(std::move(__first)), std::__unwrap_iter(std::move(__last)));
8784
}
8885

89-
template <class _Iter, class _Unwrapped = decltype(std::__unwrap_iter(std::declval<_Iter>()))>
86+
template <class _Iter, class _Unwrapped>
9087
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __rewrap_range(_Iter __orig_iter, _Unwrapped __iter) {
9188
return std::__rewrap_iter(std::move(__orig_iter), std::move(__iter));
9289
}

libcxx/utils/data/ignore_format.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ libcxx/include/__algorithm/swap_ranges.h
8484
libcxx/include/__algorithm/transform.h
8585
libcxx/include/__algorithm/uniform_random_bit_generator_adaptor.h
8686
libcxx/include/__algorithm/unwrap_iter.h
87-
libcxx/include/__algorithm/unwrap_range.h
8887
libcxx/include/any
8988
libcxx/include/array
9089
libcxx/include/__atomic/atomic_base.h

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,10 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
528528
continue;
529529
MachineInstr *NewDbgMI = SinkDst->getMF()->CloneMachineInstr(DbgMI);
530530
SinkMBB.insertAfter(InsertPt, NewDbgMI);
531-
NewDbgMI->getOperand(0).setReg(DstReg);
531+
for (auto &SrcMO : DbgMI->getDebugOperandsForReg(DefReg)) {
532+
auto &DstMO = NewDbgMI->getOperand(SrcMO.getOperandNo());
533+
DstMO.setReg(DstReg);
534+
}
532535
}
533536
} else {
534537
// Fold instruction into the addressing mode of a memory instruction.

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,6 +1784,38 @@ bool TargetLowering::SimplifyDemandedBits(
17841784
}
17851785
}
17861786

1787+
// Narrow shift to lower half - similar to ShrinkDemandedOp.
1788+
// (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1789+
unsigned HalfWidth = BitWidth / 2;
1790+
if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth) {
1791+
EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1792+
if (isNarrowingProfitable(VT, HalfVT) &&
1793+
isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1794+
isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1795+
(!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT))) {
1796+
// Unless we aren't demanding the upper bits at all, we must ensure
1797+
// that the upper bits of the shift result are known to be zero,
1798+
// which is equivalent to the narrow shift being NUW.
1799+
KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, Depth + 1);
1800+
bool IsNUW = Known0.countMinLeadingZeros() >= (ShAmt + HalfWidth);
1801+
if (IsNUW || DemandedBits.countLeadingZeros() >= HalfWidth) {
1802+
unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0, Depth + 1);
1803+
bool IsNSW = NumSignBits > (ShAmt + HalfWidth);
1804+
SDNodeFlags Flags;
1805+
Flags.setNoSignedWrap(IsNSW);
1806+
Flags.setNoUnsignedWrap(IsNUW);
1807+
SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1808+
SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1809+
ShAmt, HalfVT, dl, TLO.LegalTypes());
1810+
SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1811+
NewShiftAmt, Flags);
1812+
SDValue NewExt =
1813+
TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1814+
return TLO.CombineTo(Op, NewExt);
1815+
}
1816+
}
1817+
}
1818+
17871819
APInt InDemandedMask = DemandedBits.lshr(ShAmt);
17881820
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
17891821
Depth + 1))

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -996,8 +996,8 @@ static bool isDeadPHICycle(PHINode *PN,
996996
/// Return true if this phi node is always equal to NonPhiInVal.
997997
/// This happens with mutually cyclic phi nodes like:
998998
/// z = some value; x = phi (y, z); y = phi (x, z)
999-
static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
1000-
SmallPtrSetImpl<PHINode*> &ValueEqualPHIs) {
999+
static bool PHIsEqualValue(PHINode *PN, Value *&NonPhiInVal,
1000+
SmallPtrSetImpl<PHINode *> &ValueEqualPHIs) {
10011001
// See if we already saw this PHI node.
10021002
if (!ValueEqualPHIs.insert(PN).second)
10031003
return true;
@@ -1010,8 +1010,11 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
10101010
// the value.
10111011
for (Value *Op : PN->incoming_values()) {
10121012
if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
1013-
if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
1014-
return false;
1013+
if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs)) {
1014+
if (NonPhiInVal)
1015+
return false;
1016+
NonPhiInVal = OpPN;
1017+
}
10151018
} else if (Op != NonPhiInVal)
10161019
return false;
10171020
}
@@ -1478,33 +1481,35 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
14781481
// z = some value; x = phi (y, z); y = phi (x, z)
14791482
// where the phi nodes don't necessarily need to be in the same block. Do a
14801483
// quick check to see if the PHI node only contains a single non-phi value, if
1481-
// so, scan to see if the phi cycle is actually equal to that value.
1484+
// so, scan to see if the phi cycle is actually equal to that value. If the
1485+
// phi has no non-phi values then allow the "NonPhiInVal" to be set later if
1486+
// one of the phis itself does not have a single input.
14821487
{
14831488
unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues();
14841489
// Scan for the first non-phi operand.
14851490
while (InValNo != NumIncomingVals &&
14861491
isa<PHINode>(PN.getIncomingValue(InValNo)))
14871492
++InValNo;
14881493

1489-
if (InValNo != NumIncomingVals) {
1490-
Value *NonPhiInVal = PN.getIncomingValue(InValNo);
1494+
Value *NonPhiInVal =
1495+
InValNo != NumIncomingVals ? PN.getIncomingValue(InValNo) : nullptr;
14911496

1492-
// Scan the rest of the operands to see if there are any conflicts, if so
1493-
// there is no need to recursively scan other phis.
1497+
// Scan the rest of the operands to see if there are any conflicts, if so
1498+
// there is no need to recursively scan other phis.
1499+
if (NonPhiInVal)
14941500
for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
14951501
Value *OpVal = PN.getIncomingValue(InValNo);
14961502
if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
14971503
break;
14981504
}
14991505

1500-
// If we scanned over all operands, then we have one unique value plus
1501-
// phi values. Scan PHI nodes to see if they all merge in each other or
1502-
// the value.
1503-
if (InValNo == NumIncomingVals) {
1504-
SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
1505-
if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
1506-
return replaceInstUsesWith(PN, NonPhiInVal);
1507-
}
1506+
// If we scanned over all operands, then we have one unique value plus
1507+
// phi values. Scan PHI nodes to see if they all merge in each other or
1508+
// the value.
1509+
if (InValNo == NumIncomingVals) {
1510+
SmallPtrSet<PHINode *, 16> ValueEqualPHIs;
1511+
if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
1512+
return replaceInstUsesWith(PN, NonPhiInVal);
15081513
}
15091514
}
15101515

llvm/test/CodeGen/AArch64/sink-and-fold-dbg-value-crash.mir

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,16 @@
2626
ret void
2727
}
2828

29-
declare ptr @g(ptr)
29+
define ptr @g(ptr) {
30+
entry:
31+
br label %if.then
32+
if.then:
33+
br label %if.end
34+
if.end:
35+
br label %exit
36+
exit:
37+
ret ptr null
38+
}
3039

3140
declare void @llvm.dbg.value(metadata, metadata, metadata) #0
3241

@@ -170,3 +179,126 @@ body: |
170179
RET_ReallyLR
171180
172181
...
182+
---
183+
name: g
184+
alignment: 4
185+
exposesReturnsTwice: false
186+
legalized: false
187+
regBankSelected: false
188+
selected: false
189+
failedISel: false
190+
tracksRegLiveness: true
191+
hasWinCFI: false
192+
callsEHReturn: false
193+
callsUnwindInit: false
194+
hasEHCatchret: false
195+
hasEHScopes: false
196+
hasEHFunclets: false
197+
isOutlined: false
198+
debugInstrRef: false
199+
failsVerification: false
200+
tracksDebugUserValues: false
201+
registers:
202+
- { id: 0, class: gpr64all, preferred-register: '' }
203+
- { id: 1, class: gpr64common, preferred-register: '' }
204+
- { id: 2, class: gpr32, preferred-register: '' }
205+
- { id: 3, class: gpr32, preferred-register: '' }
206+
- { id: 4, class: gpr32, preferred-register: '' }
207+
- { id: 5, class: gpr64sp, preferred-register: '' }
208+
- { id: 6, class: gpr64all, preferred-register: '' }
209+
liveins:
210+
- { reg: '$x0', virtual-reg: '%1' }
211+
- { reg: '$w1', virtual-reg: '%2' }
212+
- { reg: '$w2', virtual-reg: '%3' }
213+
frameInfo:
214+
isFrameAddressTaken: false
215+
isReturnAddressTaken: false
216+
hasStackMap: false
217+
hasPatchPoint: false
218+
stackSize: 0
219+
offsetAdjustment: 0
220+
maxAlignment: 1
221+
adjustsStack: true
222+
hasCalls: true
223+
stackProtector: ''
224+
functionContext: ''
225+
maxCallFrameSize: 0
226+
cvBytesOfCalleeSavedRegisters: 0
227+
hasOpaqueSPAdjustment: false
228+
hasVAStart: false
229+
hasMustTailInVarArgFunc: false
230+
hasTailCall: false
231+
localFrameSize: 0
232+
savePoint: ''
233+
restorePoint: ''
234+
fixedStack: []
235+
stack: []
236+
entry_values: []
237+
callSites: []
238+
debugValueSubstitutions: []
239+
constants: []
240+
machineFunctionInfo: {}
241+
body: |
242+
; CHECK-LABEL: name: g
243+
; CHECK: bb.0.entry:
244+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
245+
; CHECK-NEXT: liveins: $x0, $w1, $w2
246+
; CHECK-NEXT: {{ $}}
247+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w2
248+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
249+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0
250+
; CHECK-NEXT: DBG_VALUE_LIST !4, !DIExpression(), [[COPY]], $noreg, debug-location !10
251+
; CHECK-NEXT: TBZW [[COPY1]], 0, %bb.2
252+
; CHECK-NEXT: B %bb.1
253+
; CHECK-NEXT: {{ $}}
254+
; CHECK-NEXT: bb.1.if.then:
255+
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
256+
; CHECK-NEXT: {{ $}}
257+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY [[COPY]]
258+
; CHECK-NEXT: DBG_VALUE_LIST !4, !DIExpression(), [[COPY3]], $noreg, debug-location !10
259+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
260+
; CHECK-NEXT: $x0 = ADDXri [[COPY2]], 8, 0
261+
; CHECK-NEXT: DBG_VALUE_LIST !4, !DIExpression(), [[COPY3]], $x0, debug-location !10
262+
; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
263+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
264+
; CHECK-NEXT: TBNZW [[COPY3]], 0, %bb.3
265+
; CHECK-NEXT: B %bb.2
266+
; CHECK-NEXT: {{ $}}
267+
; CHECK-NEXT: bb.2.if.end:
268+
; CHECK-NEXT: successors: %bb.3(0x80000000)
269+
; CHECK-NEXT: {{ $}}
270+
; CHECK-NEXT: {{ $}}
271+
; CHECK-NEXT: bb.3.exit:
272+
; CHECK-NEXT: RET_ReallyLR
273+
bb.0.entry:
274+
successors: %bb.1(0x40000000), %bb.2(0x40000000)
275+
liveins: $x0, $w1, $w2
276+
277+
%3:gpr32 = COPY $w2
278+
%2:gpr32 = COPY $w1
279+
%1:gpr64common = COPY $x0
280+
%4:gpr32 = COPY %3
281+
%5:gpr64sp = ADDXri %1, 8, 0
282+
DBG_VALUE_LIST !4, !DIExpression(), %4:gpr32, %5:gpr64sp, debug-location !10
283+
%0:gpr64all = COPY %5
284+
TBZW %2, 0, %bb.2
285+
B %bb.1
286+
287+
bb.1.if.then:
288+
successors: %bb.3(0x40000000), %bb.2(0x40000000)
289+
290+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
291+
$x0 = COPY %0
292+
BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0
293+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
294+
TBNZW %4, 0, %bb.3
295+
B %bb.2
296+
297+
bb.2.if.end:
298+
successors: %bb.3(0x80000000)
299+
300+
301+
bb.3.exit:
302+
RET_ReallyLR
303+
304+
...

llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %res
3232
; the base may be the RHS operand of the load in SDAG.
3333
; GCN-LABEL: name: test_complex_reg_offset
3434
; GCN-DAG: %[[BASE:.*]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @0 + 4,
35-
; GCN-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
35+
; SDAG-DAG: %[[OFFSET:.*]]:sreg_32 = nuw nsw S_LSHL_B32
36+
; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = S_LSHL_B32
3637
; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], killed %[[OFFSET]], 0, 0
3738
; GISEL: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 0, 0
3839
define amdgpu_ps void @test_complex_reg_offset(ptr addrspace(1) %out) {

0 commit comments

Comments
 (0)