Skip to content

Commit 58ed584

Browse files
SC llvm teamSC llvm team
SC llvm team
authored and
SC llvm team
committed
Merged main:4549a8d251cfa91cc6230139595f0b7efdf199d9 into amd-gfx:757aa7b5f1a5
Local branch amd-gfx 757aa7b Merged main:533e6bbd0d344a710c491a9eb0ce0ba0852b08cb into amd-gfx:df9ba6cae58e Remote branch main 4549a8d [InstCombine] Add additional tests for select of phi transform (NFC)
2 parents 757aa7b + 4549a8d commit 58ed584

File tree

9 files changed

+174
-44
lines changed

9 files changed

+174
-44
lines changed

compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,11 +2312,11 @@ static const char *RegNumToRegName(int reg) {
23122312
return NULL;
23132313
}
23142314

2315-
# if SANITIZER_LINUX && SANITIZER_GLIBC && \
2315+
# if ((SANITIZER_LINUX && SANITIZER_GLIBC) || SANITIZER_NETBSD) && \
23162316
(defined(__arm__) || defined(__aarch64__))
23172317
static uptr GetArmRegister(ucontext_t *ctx, int RegNum) {
23182318
switch (RegNum) {
2319-
# if defined(__arm__)
2319+
# if defined(__arm__) && !SANITIZER_NETBSD
23202320
# ifdef MAKE_CASE
23212321
# undef MAKE_CASE
23222322
# endif
@@ -2345,10 +2345,15 @@ static uptr GetArmRegister(ucontext_t *ctx, int RegNum) {
23452345
case REG_R15:
23462346
return ctx->uc_mcontext.arm_pc;
23472347
# elif defined(__aarch64__)
2348+
# if SANITIZER_LINUX
23482349
case 0 ... 30:
23492350
return ctx->uc_mcontext.regs[RegNum];
23502351
case 31:
23512352
return ctx->uc_mcontext.sp;
2353+
# elif SANITIZER_NETBSD
2354+
case 0 ... 31:
2355+
return ctx->uc_mcontext.__gregs[RegNum];
2356+
# endif
23522357
# endif
23532358
default:
23542359
return 0;
@@ -2456,7 +2461,7 @@ void SignalContext::DumpAllRegisters(void *context) {
24562461
DumpSingleReg(ucontext, REG_R14);
24572462
DumpSingleReg(ucontext, REG_R15);
24582463
Printf("\n");
2459-
# elif defined(__aarch64__) && !SANITIZER_NETBSD
2464+
# elif defined(__aarch64__)
24602465
Report("Register values:\n");
24612466
for (int i = 0; i <= 31; ++i) {
24622467
DumpSingleReg(ucontext, i);

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 509586
19+
#define LLVM_MAIN_REVISION 509590
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,27 +1937,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19371937
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
19381938
<< ": " << *Dist << "\n");
19391939

1940-
// Check if we can prove that Sink only accesses memory after Src's end or
1941-
// vice versa. At the moment this is limited to cases where either source or
1942-
// sink are loop invariant to avoid compile-time increases. This is not
1943-
// required for correctness.
1944-
if (SE.isLoopInvariant(Src, InnermostLoop) ||
1945-
SE.isLoopInvariant(Sink, InnermostLoop)) {
1946-
const auto &[SrcStart, SrcEnd] =
1947-
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
1948-
const auto &[SinkStart, SinkEnd] =
1949-
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
1950-
if (!isa<SCEVCouldNotCompute>(SrcStart) &&
1951-
!isa<SCEVCouldNotCompute>(SrcEnd) &&
1952-
!isa<SCEVCouldNotCompute>(SinkStart) &&
1953-
!isa<SCEVCouldNotCompute>(SinkEnd)) {
1954-
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1955-
return MemoryDepChecker::Dependence::NoDep;
1956-
if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1957-
return MemoryDepChecker::Dependence::NoDep;
1958-
}
1959-
}
1960-
19611940
// Need accesses with constant strides and the same direction for further
19621941
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
19631942
// similar code or pointer arithmetic that could wrap in the address space.
@@ -2003,12 +1982,45 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20031982
const MemAccessInfo &B, unsigned BIdx) {
20041983
assert(AIdx < BIdx && "Must pass arguments in program order");
20051984

1985+
// Check if we can prove that Sink only accesses memory after Src's end or
1986+
// vice versa. The helper is used to perform the checks only on the exit paths
1987+
// where it helps to improve the analysis result.
1988+
auto CheckCompletelyBeforeOrAfter = [&]() {
1989+
auto *APtr = A.getPointer();
1990+
auto *BPtr = B.getPointer();
1991+
1992+
Type *ATy = getLoadStoreType(InstMap[AIdx]);
1993+
Type *BTy = getLoadStoreType(InstMap[BIdx]);
1994+
1995+
const SCEV *Src = PSE.getSCEV(APtr);
1996+
const SCEV *Sink = PSE.getSCEV(BPtr);
1997+
1998+
const auto &[SrcStart, SrcEnd] =
1999+
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
2000+
if (isa<SCEVCouldNotCompute>(SrcStart) || isa<SCEVCouldNotCompute>(SrcEnd))
2001+
return false;
2002+
2003+
const auto &[SinkStart, SinkEnd] =
2004+
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
2005+
if (isa<SCEVCouldNotCompute>(SinkStart) ||
2006+
isa<SCEVCouldNotCompute>(SinkEnd))
2007+
return false;
2008+
2009+
auto &SE = *PSE.getSE();
2010+
return SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart) ||
2011+
SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2012+
};
2013+
20062014
// Get the dependence distance, stride, type size and what access writes for
20072015
// the dependence between A and B.
20082016
auto Res =
20092017
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2010-
if (std::holds_alternative<Dependence::DepType>(Res))
2018+
if (std::holds_alternative<Dependence::DepType>(Res)) {
2019+
if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2020+
CheckCompletelyBeforeOrAfter())
2021+
return Dependence::NoDep;
20112022
return std::get<Dependence::DepType>(Res);
2023+
}
20122024

20132025
auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20142026
std::get<DepDistanceStrideAndSizeInfo>(Res);
@@ -2017,6 +2029,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20172029
std::optional<uint64_t> CommonStride =
20182030
StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt;
20192031
if (isa<SCEVCouldNotCompute>(Dist)) {
2032+
if (CheckCompletelyBeforeOrAfter())
2033+
return Dependence::NoDep;
2034+
20202035
// TODO: Relax requirement that there is a common stride to retry with
20212036
// non-constant distance dependencies.
20222037
FoundNonConstantDistanceDependence |= CommonStride.has_value();
@@ -2068,6 +2083,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20682083
// Write to the same location with the same size.
20692084
return Dependence::Forward;
20702085
}
2086+
assert(!CheckCompletelyBeforeOrAfter() &&
2087+
"unexpectedly proved no dependence");
20712088
LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
20722089
"different type sizes\n");
20732090
return Dependence::Unknown;
@@ -2089,6 +2106,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20892106
// did not set it when strides were different but there is no inherent
20902107
// reason to.
20912108
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2109+
if (CheckCompletelyBeforeOrAfter())
2110+
return Dependence::NoDep;
20922111
return Dependence::Unknown;
20932112
}
20942113
if (!HasSameSize ||
@@ -2108,6 +2127,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21082127
// Below we only handle strictly positive distances.
21092128
if (MinDistance <= 0) {
21102129
FoundNonConstantDistanceDependence |= CommonStride.has_value();
2130+
if (CheckCompletelyBeforeOrAfter())
2131+
return Dependence::NoDep;
2132+
21112133
return Dependence::Unknown;
21122134
}
21132135

@@ -2124,13 +2146,18 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21242146
}
21252147

21262148
if (!HasSameSize) {
2149+
if (CheckCompletelyBeforeOrAfter())
2150+
return Dependence::NoDep;
21272151
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
21282152
"different type sizes\n");
21292153
return Dependence::Unknown;
21302154
}
21312155

2132-
if (!CommonStride)
2156+
if (!CommonStride) {
2157+
if (CheckCompletelyBeforeOrAfter())
2158+
return Dependence::NoDep;
21332159
return Dependence::Unknown;
2160+
}
21342161

21352162
// Bail out early if passed-in parameters make vectorization not feasible.
21362163
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -2178,6 +2205,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21782205
// dependence distance and the distance may be larger at runtime (and safe
21792206
// for vectorization). Classify it as Unknown, so we re-try with runtime
21802207
// checks.
2208+
//
2209+
if (CheckCompletelyBeforeOrAfter())
2210+
return Dependence::NoDep;
2211+
21812212
return Dependence::Unknown;
21822213
}
21832214
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
@@ -2190,6 +2221,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21902221
if (MinDistanceNeeded > MinDepDistBytes) {
21912222
LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
21922223
<< MinDistanceNeeded << " size in bytes\n");
2224+
assert(!CheckCompletelyBeforeOrAfter() &&
2225+
"unexpectedly proved no dependence");
21932226
return Dependence::Backward;
21942227
}
21952228

@@ -2237,6 +2270,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22372270
// For non-constant distances, we checked the lower bound of the dependence
22382271
// distance and the distance may be larger at runtime (and safe for
22392272
// vectorization). Classify it as Unknown, so we re-try with runtime checks.
2273+
assert(!CheckCompletelyBeforeOrAfter() &&
2274+
"unexpectedly proved no dependence");
22402275
return Dependence::Unknown;
22412276
}
22422277

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,12 @@ static cl::opt<bool> EnableScalarIRPasses(
338338
cl::init(true),
339339
cl::Hidden);
340340

341+
static cl::opt<bool>
342+
EnableSwLowerLDS("amdgpu-enable-sw-lower-lds",
343+
cl::desc("Enable lowering of lds to global memory pass "
344+
"and asan instrument resulting IR."),
345+
cl::init(true), cl::Hidden);
346+
341347
static cl::opt<bool, true> EnableLowerModuleLDS(
342348
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
343349
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
@@ -761,6 +767,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
761767
// We want to support the -lto-partitions=N option as "best effort".
762768
// For that, we need to lower LDS earlier in the pipeline before the
763769
// module is partitioned for codegen.
770+
if (EnableSwLowerLDS)
771+
PM.addPass(AMDGPUSwLowerLDSPass(*this));
764772
if (EnableLowerModuleLDS)
765773
PM.addPass(AMDGPULowerModuleLDSPass(*this));
766774

@@ -1071,6 +1079,10 @@ void AMDGPUPassConfig::addIRPasses() {
10711079
// Replace OpenCL enqueued block function pointers with global variables.
10721080
addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
10731081

1082+
// Lower LDS accesses to global memory pass if address sanitizer is enabled.
1083+
if (EnableSwLowerLDS)
1084+
addPass(createAMDGPUSwLowerLDSLegacyPass(&TM));
1085+
10741086
// Runs before PromoteAlloca so the latter can account for function uses
10751087
if (EnableLowerModuleLDS) {
10761088
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));

llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -130,16 +130,8 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
130130
; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence'
131131
; CHECK-NEXT: loop:
132132
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
133-
; CHECK-NEXT: Unknown data dependence.
133+
; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
134134
; CHECK-NEXT: Dependences:
135-
; CHECK-NEXT: Unknown:
136-
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
137-
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
138-
; CHECK-EMPTY:
139-
; CHECK-NEXT: Unknown:
140-
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
141-
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
142-
; CHECK-EMPTY:
143135
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
144136
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
145137
; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8

llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,8 @@ exit:
4545
define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
4646
; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
4747
; CHECK-NEXT: loop:
48-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
49-
; CHECK-NEXT: Unknown data dependence.
48+
; CHECK-NEXT: Memory dependences are safe
5049
; CHECK-NEXT: Dependences:
51-
; CHECK-NEXT: Unknown:
52-
; CHECK-NEXT: %l = load i32, ptr %gep, align 4 ->
53-
; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4
54-
; CHECK-EMPTY:
5550
; CHECK-NEXT: Run-time memory checks:
5651
; CHECK-NEXT: Grouped accesses:
5752
; CHECK-EMPTY:

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl)
4040
; GCN-O0-NEXT: Function Alias Analysis Results
4141
; GCN-O0-NEXT: Lower OpenCL enqueued blocks
42+
; GCN-O0-NEXT: AMDGPU Software lowering of LDS
4243
; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions
4344
; GCN-O0-NEXT: FunctionPass Manager
4445
; GCN-O0-NEXT: Expand Atomic instructions
@@ -190,6 +191,7 @@
190191
; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl)
191192
; GCN-O1-NEXT: Function Alias Analysis Results
192193
; GCN-O1-NEXT: Lower OpenCL enqueued blocks
194+
; GCN-O1-NEXT: AMDGPU Software lowering of LDS
193195
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
194196
; GCN-O1-NEXT: FunctionPass Manager
195197
; GCN-O1-NEXT: Infer address spaces
@@ -471,6 +473,7 @@
471473
; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl)
472474
; GCN-O1-OPTS-NEXT: Function Alias Analysis Results
473475
; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks
476+
; GCN-O1-OPTS-NEXT: AMDGPU Software lowering of LDS
474477
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
475478
; GCN-O1-OPTS-NEXT: FunctionPass Manager
476479
; GCN-O1-OPTS-NEXT: Infer address spaces
@@ -783,6 +786,7 @@
783786
; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl)
784787
; GCN-O2-NEXT: Function Alias Analysis Results
785788
; GCN-O2-NEXT: Lower OpenCL enqueued blocks
789+
; GCN-O2-NEXT: AMDGPU Software lowering of LDS
786790
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
787791
; GCN-O2-NEXT: FunctionPass Manager
788792
; GCN-O2-NEXT: Infer address spaces
@@ -1099,6 +1103,7 @@
10991103
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
11001104
; GCN-O3-NEXT: Function Alias Analysis Results
11011105
; GCN-O3-NEXT: Lower OpenCL enqueued blocks
1106+
; GCN-O3-NEXT: AMDGPU Software lowering of LDS
11021107
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
11031108
; GCN-O3-NEXT: FunctionPass Manager
11041109
; GCN-O3-NEXT: Infer address spaces

0 commit comments

Comments
 (0)