Skip to content

Commit 757aa7b

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:533e6bbd0d344a710c491a9eb0ce0ba0852b08cb into amd-gfx:df9ba6cae58e
Local branch amd-gfx df9ba6c Merged main:7e6b1504c7cf6976ac8e9012c4513ffa258bd8eb into amd-gfx:c4f35426c150 Remote branch main 533e6bb [VPlan] Simplify live-ins if they are SCEVConstant.
2 parents df9ba6c + 533e6bb commit 757aa7b

File tree

13 files changed

+162
-51
lines changed

13 files changed

+162
-51
lines changed

clang/lib/Sema/SemaConcept.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -977,8 +977,30 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction(
977977
// equivalence.
978978
LocalInstantiationScope ScopeForParameters(S);
979979
if (auto *FD = DeclInfo.getDecl()->getAsFunction())
980-
for (auto *PVD : FD->parameters())
981-
ScopeForParameters.InstantiatedLocal(PVD, PVD);
980+
for (auto *PVD : FD->parameters()) {
981+
if (!PVD->isParameterPack()) {
982+
ScopeForParameters.InstantiatedLocal(PVD, PVD);
983+
continue;
984+
}
985+
// This is hacky: we're mapping the parameter pack to a size-of-1 argument
986+
// to avoid building SubstTemplateTypeParmPackTypes for
987+
// PackExpansionTypes. The SubstTemplateTypeParmPackType node would
988+
// otherwise reference the AssociatedDecl of the template arguments, which
989+
// is, in this case, the template declaration.
990+
//
991+
// However, as we are in the process of comparing potential
992+
// re-declarations, the canonical declaration is the declaration itself at
993+
// this point. So if we didn't expand these packs, we would end up with an
994+
// incorrect profile difference because we will be profiling the
995+
// canonical types!
996+
//
997+
// FIXME: Improve the "no-transform" machinery in FindInstantiatedDecl so
998+
// that we can eliminate the Scope in the cases where the declarations are
999+
// not necessarily instantiated. It would also benefit the noexcept
1000+
// specifier comparison.
1001+
ScopeForParameters.MakeInstantiatedLocalArgPack(PVD);
1002+
ScopeForParameters.InstantiatedLocalPackArg(PVD, PVD);
1003+
}
9821004

9831005
std::optional<Sema::CXXThisScopeRAII> ThisScope;
9841006

clang/test/SemaTemplate/concepts-out-of-line-def.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,3 +599,26 @@ template <class DerT>
599599
unsigned long DerivedCollection<DerTs...>::index() {}
600600

601601
} // namespace GH72557
602+
603+
namespace GH101735 {
604+
605+
template <class, class>
606+
concept True = true;
607+
608+
template <typename T>
609+
class A {
610+
template <typename... Ts>
611+
void method(Ts&... ts)
612+
requires requires (T t) {
613+
{ t.method(static_cast<Ts &&>(ts)...) } -> True<void>;
614+
};
615+
};
616+
617+
template <typename T>
618+
template <typename... Ts>
619+
void A<T>::method(Ts&... ts)
620+
requires requires (T t) {
621+
{ t.method(static_cast<Ts &&>(ts)...) } -> True<void>;
622+
} {}
623+
624+
}

compiler-rt/lib/nsan/nsan.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT CheckType,
446446
const InternalFT check_shadow = Shadow;
447447

448448
// We only check for NaNs in the value, not the shadow.
449-
if (flags().check_nan && isnan(check_value)) {
449+
if (flags().check_nan && isnan(value)) {
450450
GET_CALLER_PC_BP;
451451
BufferedStackTrace stack;
452452
stack.Unwind(pc, bp, nullptr, false);

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 509579
19+
#define LLVM_MAIN_REVISION 509586
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ add_llvm_target(AMDGPUCodeGen
190190
HipStdPar
191191
IPO
192192
IRPrinter
193+
Instrumentation
193194
MC
194195
MIRParser
195196
Passes

llvm/lib/Transforms/IPO/SCCP.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,12 +289,10 @@ static bool runIPSCCP(
289289
if (ReturnValue.isConstantRangeIncludingUndef())
290290
continue;
291291

292-
// Do not touch existing attribute for now.
293-
// TODO: We should be able to take the intersection of the existing
294-
// attribute and the inferred range.
292+
// Take the intersection of the existing attribute and the inferred range.
293+
ConstantRange CR = ReturnValue.getConstantRange();
295294
if (F->hasRetAttribute(Attribute::Range))
296-
continue;
297-
auto &CR = ReturnValue.getConstantRange();
295+
CR = CR.intersectWith(F->getRetAttribute(Attribute::Range).getRange());
298296
F->addRangeRetAttr(CR);
299297
continue;
300298
}

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,10 @@ static std::optional<std::pair<unsigned, unsigned>> getMaskedTypeForICmpPair(
270270
E = R2;
271271
Ok = true;
272272
}
273+
274+
// Avoid matching against the -1 value we created for unmasked operand.
275+
if (Ok && match(A, m_AllOnes()))
276+
Ok = false;
273277
}
274278

275279
// Bail if RHS was a icmp that can't be decomposed into an equality.

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,21 +2060,10 @@ static bool canSinkInstructions(
20602060
return I->getOperand(OI) == I0->getOperand(OI);
20612061
};
20622062
if (!all_of(Insts, SameAsI0)) {
2063-
// Because SROA historically couldn't handle speculating stores of
2064-
// selects, we try not to sink loads, stores or lifetime markers of
2065-
// allocas when we'd have to create a PHI for the address operand.
2066-
// TODO: SROA supports speculation for loads and stores now -- remove
2067-
// this hack?
2068-
if (isa<StoreInst>(I0) && OI == 1 &&
2069-
any_of(Insts, [](const Instruction *I) {
2070-
return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2071-
}))
2072-
return false;
2073-
if (isa<LoadInst>(I0) && OI == 0 &&
2074-
any_of(Insts, [](const Instruction *I) {
2075-
return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2076-
}))
2077-
return false;
2063+
// SROA can't speculate lifetime markers of selects/phis, and the
2064+
// backend may handle such lifetimes incorrectly as well (#104776).
2065+
// Don't sink lifetimes if it would introduce a phi on the pointer
2066+
// argument.
20782067
if (isLifeTimeMarker(I0) && OI == 1 &&
20792068
any_of(Insts, [](const Instruction *I) {
20802069
return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "VPlan.h"
1414
#include "llvm/ADT/DenseMap.h"
1515
#include "llvm/ADT/PointerUnion.h"
16+
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
1617
#include "llvm/IR/IRBuilder.h"
1718

1819
namespace llvm {
@@ -173,6 +174,11 @@ class VPRecipeBuilder {
173174
if (auto *R = Ingredient2Recipe.lookup(I))
174175
return R->getVPSingleValue();
175176
}
177+
ScalarEvolution &SE = *PSE.getSE();
178+
if (!isa<Constant>(V) && SE.isSCEVable(V->getType()))
179+
if (auto *C = dyn_cast<SCEVConstant>(PSE.getSE()->getSCEV(V)))
180+
return Plan.getOrAddLiveIn(C->getValue());
181+
176182
return Plan.getOrAddLiveIn(V);
177183
}
178184
};

llvm/test/Transforms/InstCombine/bit-checks.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -809,12 +809,10 @@ define i32 @main7a_logical(i32 %argc, i32 %argc2, i32 %argc3) {
809809
define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3x) {
810810
; CHECK-LABEL: @main7b(
811811
; CHECK-NEXT: [[ARGC3:%.*]] = mul i32 [[ARGC3X:%.*]], 42
812-
; CHECK-NEXT: [[AND1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]]
813-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[ARGC2]], [[AND1]]
814-
; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3]]
815-
; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[ARGC3]], [[AND2]]
816-
; CHECK-NEXT: [[AND_COND_NOT:%.*]] = or i1 [[TOBOOL]], [[TOBOOL3]]
817-
; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32
812+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3]]
813+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARGC:%.*]], [[TMP1]]
814+
; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
815+
; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32
818816
; CHECK-NEXT: ret i32 [[STOREMERGE]]
819817
;
820818
%argc3 = mul i32 %argc3x, 42 ; thwart complexity-based canonicalization
@@ -850,12 +848,10 @@ define i32 @main7b_logical(i32 %argc, i32 %argc2, i32 %argc3) {
850848
define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3x) {
851849
; CHECK-LABEL: @main7c(
852850
; CHECK-NEXT: [[ARGC3:%.*]] = mul i32 [[ARGC3X:%.*]], 42
853-
; CHECK-NEXT: [[AND1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC:%.*]]
854-
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[ARGC2]], [[AND1]]
855-
; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC3]], [[ARGC]]
856-
; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[ARGC3]], [[AND2]]
857-
; CHECK-NEXT: [[AND_COND_NOT:%.*]] = or i1 [[TOBOOL]], [[TOBOOL3]]
858-
; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32
851+
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3]]
852+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[ARGC:%.*]], [[TMP1]]
853+
; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
854+
; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32
859855
; CHECK-NEXT: ret i32 [[STOREMERGE]]
860856
;
861857
%argc3 = mul i32 %argc3x, 42 ; thwart complexity-based canonicalization

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,71 @@ exit:
931931
ret void
932932
}
933933

934+
; Test case for https://github.com/llvm/llvm-project/issues/105722.
935+
define i64 @live_in_known_1_via_scev() {
936+
; CHECK-LABEL: @live_in_known_1_via_scev(
937+
; CHECK-NEXT: entry:
938+
; CHECK-NEXT: [[SEL:%.*]] = select i1 false, i32 3, i32 0
939+
; CHECK-NEXT: br label [[PH:%.*]]
940+
; CHECK: ph:
941+
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ]
942+
; CHECK-NEXT: [[N:%.*]] = add nuw nsw i32 [[SEL]], 6
943+
; CHECK-NEXT: [[P_EXT:%.*]] = zext nneg i32 [[P]] to i64
944+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
945+
; CHECK: vector.ph:
946+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
947+
; CHECK: vector.body:
948+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
949+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
950+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
951+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
952+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
953+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], <i32 5, i32 5, i32 5, i32 5>
954+
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]]
955+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
956+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
957+
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
958+
; CHECK: middle.block:
959+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[TMP1]])
960+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
961+
; CHECK: scalar.ph:
962+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[PH]] ]
963+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 3, [[PH]] ]
964+
; CHECK-NEXT: br label [[LOOP:%.*]]
965+
; CHECK: loop:
966+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
967+
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RED_MUL:%.*]], [[LOOP]] ]
968+
; CHECK-NEXT: [[RED_MUL]] = mul nsw i64 [[RED]], [[P_EXT]]
969+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
970+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
971+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
972+
; CHECK: exit:
973+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_MUL]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
974+
; CHECK-NEXT: ret i64 [[RES]]
975+
;
976+
entry:
977+
%sel = select i1 false, i32 3, i32 0
978+
br label %ph
979+
980+
ph:
981+
%p = phi i32 [ 1, %entry ]
982+
%N = add nuw nsw i32 %sel, 6
983+
%p.ext = zext nneg i32 %p to i64
984+
br label %loop
985+
986+
loop:
987+
%iv = phi i32 [ 0, %ph ], [ %iv.next, %loop ]
988+
%red = phi i64 [ 3, %ph ], [ %red.mul, %loop ]
989+
%red.mul = mul nsw i64 %red, %p.ext
990+
%iv.next = add nuw nsw i32 %iv, 1
991+
%ec = icmp eq i32 %iv.next, %N
992+
br i1 %ec, label %exit, label %loop
993+
994+
exit:
995+
%res = phi i64 [ %red.mul, %loop ]
996+
ret i64 %res
997+
}
998+
934999
declare void @llvm.assume(i1 noundef) #0
9351000

9361001
attributes #0 = { "target-cpu"="penryn" }
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes=ipsccp -S | FileCheck %s
3+
4+
declare range(i32 0, 20) i32 @callee(i32)
5+
6+
define range(i32 10, 30) i32 @caller(i32 %x) {
7+
; CHECK-LABEL: define range(i32 10, 20) i32 @caller(
8+
; CHECK-SAME: i32 [[X:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[CALL:%.*]] = call range(i32 0, 20) i32 @callee()
11+
; CHECK-NEXT: ret i32 [[CALL]]
12+
;
13+
entry:
14+
%call = call range(i32 0, 20) i32 @callee()
15+
ret i32 %call
16+
}

llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -803,14 +803,8 @@ define i32 @test_pr30188(i1 zeroext %flag, i32 %x) {
803803
; CHECK-NEXT: entry:
804804
; CHECK-NEXT: [[Y:%.*]] = alloca i32, align 4
805805
; CHECK-NEXT: [[Z:%.*]] = alloca i32, align 4
806-
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
807-
; CHECK: if.then:
808-
; CHECK-NEXT: store i32 [[X:%.*]], ptr [[Y]], align 4
809-
; CHECK-NEXT: br label [[IF_END:%.*]]
810-
; CHECK: if.else:
811-
; CHECK-NEXT: store i32 [[X]], ptr [[Z]], align 4
812-
; CHECK-NEXT: br label [[IF_END]]
813-
; CHECK: if.end:
806+
; CHECK-NEXT: [[Y_Z:%.*]] = select i1 [[FLAG:%.*]], ptr [[Y]], ptr [[Z]]
807+
; CHECK-NEXT: store i32 [[X:%.*]], ptr [[Y_Z]], align 4
814808
; CHECK-NEXT: ret i32 1
815809
;
816810
entry:
@@ -836,17 +830,14 @@ define i32 @test_pr30188a(i1 zeroext %flag, i32 %x) {
836830
; CHECK-NEXT: entry:
837831
; CHECK-NEXT: [[Y:%.*]] = alloca i32, align 4
838832
; CHECK-NEXT: [[Z:%.*]] = alloca i32, align 4
839-
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
833+
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
840834
; CHECK: if.then:
841835
; CHECK-NEXT: call void @g()
842-
; CHECK-NEXT: [[ONE:%.*]] = load i32, ptr [[Y]], align 4
843-
; CHECK-NEXT: br label [[IF_END:%.*]]
844-
; CHECK: if.else:
845-
; CHECK-NEXT: [[THREE:%.*]] = load i32, ptr [[Z]], align 4
846836
; CHECK-NEXT: br label [[IF_END]]
847837
; CHECK: if.end:
848-
; CHECK-NEXT: [[THREE_SINK:%.*]] = phi i32 [ [[THREE]], [[IF_ELSE]] ], [ [[ONE]], [[IF_THEN]] ]
849-
; CHECK-NEXT: [[FOUR:%.*]] = add i32 [[THREE_SINK]], 2
838+
; CHECK-NEXT: [[Z_SINK:%.*]] = phi ptr [ [[Y]], [[IF_THEN]] ], [ [[Z]], [[ENTRY:%.*]] ]
839+
; CHECK-NEXT: [[THREE:%.*]] = load i32, ptr [[Z_SINK]], align 4
840+
; CHECK-NEXT: [[FOUR:%.*]] = add i32 [[THREE]], 2
850841
; CHECK-NEXT: store i32 [[FOUR]], ptr [[Y]], align 4
851842
; CHECK-NEXT: ret i32 1
852843
;

0 commit comments

Comments
 (0)