Skip to content

Commit c8f70d7

Browse files
authored
[clang][CodeGen] Additional fixes for #114062 (#128166)
This addresses two issues introduced by moving indirect args into an explicit AS (please see <#114062 (comment)> and <#114062 (comment)>): 1. Unconditionally stripping casts from a pre-allocated return slot was incorrect / insufficient (this is illustrated by the `amdgcn_sret_ctor.cpp` test); 2. Putting compiler manufactured sret args in a non default AS can lead to a C-cast (surprisingly) requiring an AS cast (this is illustrated by the `sret_cast_with_nonzero_alloca_as.cpp test). The way we handle (2), by subverting CK_BitCast emission iff a sret arg is involved, is quite naff, but I couldn't think of any other way to use a non default indirect AS and make this case work (hopefully this is a failure of imagination on my part).
1 parent a316539 commit c8f70d7

File tree

6 files changed

+106
-14
lines changed

6 files changed

+106
-14
lines changed

clang/lib/CodeGen/CGCall.cpp

+16-3
Original file line numberDiff line numberDiff line change
@@ -5163,6 +5163,21 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
51635163
}
51645164
}
51655165
if (IRFunctionArgs.hasSRetArg()) {
5166+
// A mismatch between the allocated return value's AS and the target's
5167+
// chosen IndirectAS can happen e.g. when passing the this pointer through
5168+
// a chain involving stores to / loads from the DefaultAS; we address this
5169+
// here, symmetrically with the handling we have for normal pointer args.
5170+
if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
5171+
llvm::Value *V = SRetPtr.getBasePointer();
5172+
LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace());
5173+
LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace());
5174+
llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
5175+
RetAI.getIndirectAddrSpace());
5176+
5177+
SRetPtr = SRetPtr.withPointer(
5178+
getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, true),
5179+
SRetPtr.isKnownNonNull());
5180+
}
51665181
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
51675182
getAsNaturalPointerTo(SRetPtr, RetTy);
51685183
} else if (RetAI.isInAlloca()) {
@@ -5394,9 +5409,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
53945409
V->getType()->isIntegerTy())
53955410
V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());
53965411

5397-
// The only plausible mismatch here would be for pointer address spaces,
5398-
// which can happen e.g. when passing a sret arg that is in the AllocaAS
5399-
// to a function that takes a pointer to and argument in the DefaultAS.
5412+
// The only plausible mismatch here would be for pointer address spaces.
54005413
// We assume that the target has a reasonable mapping for the DefaultAS
54015414
// (it can be casted to from incoming specific ASes), and insert an AS
54025415
// cast to address the mismatch.

clang/lib/CodeGen/CGExprAgg.cpp

+2-9
Original file line numberDiff line numberDiff line change
@@ -302,15 +302,8 @@ void AggExprEmitter::withReturnValueSlot(
302302
llvm::Value *LifetimeSizePtr = nullptr;
303303
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
304304
if (!UseTemp) {
305-
// It is possible for the existing slot we are using directly to have been
306-
// allocated in the correct AS for an indirect return, and then cast to
307-
// the default AS (this is the behaviour of CreateMemTemp), however we know
308-
// that the return address is expected to point to the uncasted AS, hence we
309-
// strip possible pointer casts here.
310-
if (Dest.getAddress().isValid())
311-
RetAddr = Dest.getAddress().withPointer(
312-
Dest.getAddress().getBasePointer()->stripPointerCasts(),
313-
Dest.getAddress().isKnownNonNull());
305+
RetAddr = Dest.getAddress();
306+
RawAddress RetAllocaAddr = RawAddress::invalid();
314307
} else {
315308
RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
316309
llvm::TypeSize Size =

clang/lib/CodeGen/CGExprScalar.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "clang/Basic/CodeGenOptions.h"
3131
#include "clang/Basic/TargetInfo.h"
3232
#include "llvm/ADT/APFixedPoint.h"
33+
#include "llvm/IR/Argument.h"
3334
#include "llvm/IR/CFG.h"
3435
#include "llvm/IR/Constants.h"
3536
#include "llvm/IR/DataLayout.h"
@@ -2352,6 +2353,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
23522353
Value *Src = Visit(const_cast<Expr*>(E));
23532354
llvm::Type *SrcTy = Src->getType();
23542355
llvm::Type *DstTy = ConvertType(DestTy);
2356+
2357+
// FIXME: this is a gross but seemingly necessary workaround for an issue
2358+
// manifesting when a target uses a non-default AS for indirect sret args,
2359+
// but the source HLL is generic, wherein a valid C-cast or reinterpret_cast
2360+
// on the address of a local struct that gets returned by value yields an
2361+
// invalid bitcast from the a pointer to the IndirectAS to a pointer to the
2362+
// DefaultAS. We can only do this subversive thing because sret args are
2363+
// manufactured and them residing in the IndirectAS is a target specific
2364+
// detail, and doing an AS cast here still retains the semantics the user
2365+
// expects. It is desirable to remove this iff a better solution is found.
2366+
if (auto A = dyn_cast<llvm::Argument>(Src); A && A->hasStructRetAttr())
2367+
return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
2368+
CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(),
2369+
DstTy);
2370+
23552371
assert(
23562372
(!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() ||
23572373
SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) &&

clang/test/CodeGen/partial-reinitialization2.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ void test5(void)
9191
// CHECK-LABEL: test6
9292
void test6(void)
9393
{
94-
// CHECK: [[VAR:%[a-z0-9]+]] = alloca
95-
// CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]])
94+
// CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0
95+
// CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[LP]])
9696

9797
// CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235()
9898
// CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
3+
4+
struct X { int z[17]; };
5+
6+
// CHECK-LABEL: define dso_local void @_Z3foocc(
7+
// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_X:%.*]]) align 4 [[AGG_RESULT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: [[ENTRY:.*:]]
9+
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
10+
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
11+
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
12+
// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
13+
// CHECK-NEXT: store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
14+
// CHECK-NEXT: store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
15+
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
16+
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
17+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 1
18+
// CHECK-NEXT: store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
19+
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
20+
// CHECK-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
21+
// CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST1]], i64 2
22+
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
23+
// CHECK-NEXT: ret void
24+
//
25+
X foo(char x, char y) {
26+
X r;
27+
28+
*(((char*)&r) + 1) = x;
29+
*(reinterpret_cast<char*>(&r) + 2) = y;
30+
31+
return r;
32+
}
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s
3+
4+
#pragma omp begin declare target
5+
struct S {
6+
~S();
7+
};
8+
S s();
9+
struct E {
10+
S foo;
11+
E() noexcept;
12+
};
13+
E::E() noexcept : foo(s()) {}
14+
#pragma omp end declare target
15+
// CHECK-LABEL: define hidden void @_ZN1EC2Ev(
16+
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
17+
// CHECK-NEXT: [[ENTRY:.*:]]
18+
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
19+
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
20+
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
21+
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
22+
// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
23+
// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
24+
// CHECK-NEXT: ret void
25+
//
26+
// CHECK-LABEL: declare void @_Z1sv(
27+
// CHECK-SAME: ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1) #[[ATTR1:[0-9]+]]
28+
//
29+
// CHECK-LABEL: define hidden void @_ZN1EC1Ev(
30+
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
31+
// CHECK-NEXT: [[ENTRY:.*:]]
32+
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
33+
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
34+
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
35+
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
36+
// CHECK-NEXT: call void @_ZN1EC2Ev(ptr noundef nonnull align 1 dereferenceable(1) [[THIS1]]) #[[ATTR3:[0-9]+]]
37+
// CHECK-NEXT: ret void
38+
//

0 commit comments

Comments
 (0)