Skip to content

Commit 7318fe6

Browse files
committed
[OpenMP][FIX] Ensure device reduction geps work for multi-var reductions
If we have more than one reduction variable we need to be consistent wrt. indexing. In 3de645e we broke this as the buffer type was reduced to a singleton but the index computation was not adjusted to account for that offset. This fixes it by interleaving the reduction variables properly in a array-of-struct style. We can revert it back to struct-of-array in a follow up if turns out to be a problem. I doubt it since half the accesses should benefit from the locallity this layout offers and only the other half were consecutive before.
1 parent bc81f8c commit 7318fe6

File tree

5 files changed

+237
-221
lines changed

5 files changed

+237
-221
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,11 @@ static RecordDecl *buildRecordForGlobalizedVars(
153153
Field->addAttr(*I);
154154
}
155155
} else {
156-
llvm::APInt ArraySize(32, BufSize);
157-
Type = C.getConstantArrayType(Type, ArraySize, nullptr,
158-
ArraySizeModifier::Normal, 0);
156+
if (BufSize > 1) {
157+
llvm::APInt ArraySize(32, BufSize);
158+
Type = C.getConstantArrayType(Type, ArraySize, nullptr,
159+
ArraySizeModifier::Normal, 0);
160+
}
159161
Field = FieldDecl::Create(
160162
C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
161163
C.getTrivialTypeSourceInfo(Type, SourceLocation()),
@@ -2205,8 +2207,7 @@ static llvm::Value *emitListToGlobalCopyFunction(
22052207
llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
22062208
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
22072209
LLVMReductionsBufferTy->getPointerTo());
2208-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2209-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2210+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
22102211
/*Volatile=*/false, C.IntTy,
22112212
Loc)};
22122213
unsigned Idx = 0;
@@ -2224,12 +2225,12 @@ static llvm::Value *emitListToGlobalCopyFunction(
22242225
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
22252226
// Global = Buffer.VD[Idx];
22262227
const FieldDecl *FD = VarFieldMap.lookup(VD);
2228+
llvm::Value *BufferPtr =
2229+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
22272230
LValue GlobLVal = CGF.EmitLValueForField(
2228-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2231+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
22292232
Address GlobAddr = GlobLVal.getAddress(CGF);
2230-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
2231-
GlobAddr.getPointer(), Idxs);
2232-
GlobLVal.setAddress(Address(BufferPtr,
2233+
GlobLVal.setAddress(Address(GlobAddr.getPointer(),
22332234
CGF.ConvertTypeForMem(Private->getType()),
22342235
GlobAddr.getAlignment()));
22352236
switch (CGF.getEvaluationKind(Private->getType())) {
@@ -2316,8 +2317,7 @@ static llvm::Value *emitListToGlobalReduceFunction(
23162317
Address ReductionList =
23172318
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
23182319
auto IPriv = Privates.begin();
2319-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2320-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2320+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
23212321
/*Volatile=*/false, C.IntTy,
23222322
Loc)};
23232323
unsigned Idx = 0;
@@ -2326,12 +2326,13 @@ static llvm::Value *emitListToGlobalReduceFunction(
23262326
// Global = Buffer.VD[Idx];
23272327
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
23282328
const FieldDecl *FD = VarFieldMap.lookup(VD);
2329+
llvm::Value *BufferPtr =
2330+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
23292331
LValue GlobLVal = CGF.EmitLValueForField(
2330-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2332+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
23312333
Address GlobAddr = GlobLVal.getAddress(CGF);
2332-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
2333-
GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
2334-
CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
2334+
CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
2335+
C.VoidPtrTy);
23352336
if ((*IPriv)->getType()->isVariablyModifiedType()) {
23362337
// Store array size.
23372338
++Idx;
@@ -2413,8 +2414,7 @@ static llvm::Value *emitGlobalToListCopyFunction(
24132414
CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
24142415
LLVMReductionsBufferTy->getPointerTo());
24152416

2416-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2417-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2417+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
24182418
/*Volatile=*/false, C.IntTy,
24192419
Loc)};
24202420
unsigned Idx = 0;
@@ -2432,12 +2432,12 @@ static llvm::Value *emitGlobalToListCopyFunction(
24322432
const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
24332433
// Global = Buffer.VD[Idx];
24342434
const FieldDecl *FD = VarFieldMap.lookup(VD);
2435+
llvm::Value *BufferPtr =
2436+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
24352437
LValue GlobLVal = CGF.EmitLValueForField(
2436-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2438+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
24372439
Address GlobAddr = GlobLVal.getAddress(CGF);
2438-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(GlobAddr.getElementType(),
2439-
GlobAddr.getPointer(), Idxs);
2440-
GlobLVal.setAddress(Address(BufferPtr,
2440+
GlobLVal.setAddress(Address(GlobAddr.getPointer(),
24412441
CGF.ConvertTypeForMem(Private->getType()),
24422442
GlobAddr.getAlignment()));
24432443
switch (CGF.getEvaluationKind(Private->getType())) {
@@ -2524,8 +2524,7 @@ static llvm::Value *emitGlobalToListReduceFunction(
25242524
Address ReductionList =
25252525
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
25262526
auto IPriv = Privates.begin();
2527-
llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
2528-
CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
2527+
llvm::Value *Idxs[] = {CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
25292528
/*Volatile=*/false, C.IntTy,
25302529
Loc)};
25312530
unsigned Idx = 0;
@@ -2534,12 +2533,13 @@ static llvm::Value *emitGlobalToListReduceFunction(
25342533
// Global = Buffer.VD[Idx];
25352534
const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
25362535
const FieldDecl *FD = VarFieldMap.lookup(VD);
2536+
llvm::Value *BufferPtr =
2537+
Bld.CreateInBoundsGEP(LLVMReductionsBufferTy, BufferArrPtr, Idxs);
25372538
LValue GlobLVal = CGF.EmitLValueForField(
2538-
CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
2539+
CGF.MakeNaturalAlignAddrLValue(BufferPtr, StaticTy), FD);
25392540
Address GlobAddr = GlobLVal.getAddress(CGF);
2540-
llvm::Value *BufferPtr = Bld.CreateInBoundsGEP(
2541-
GlobAddr.getElementType(), GlobAddr.getPointer(), Idxs);
2542-
CGF.EmitStoreOfScalar(BufferPtr, Elem, /*Volatile=*/false, C.VoidPtrTy);
2541+
CGF.EmitStoreOfScalar(GlobAddr.getPointer(), Elem, /*Volatile=*/false,
2542+
C.VoidPtrTy);
25432543
if ((*IPriv)->getType()->isVariablyModifiedType()) {
25442544
// Store array size.
25452545
++Idx;

0 commit comments

Comments
 (0)