Skip to content

Commit b9e1cad

Browse files
AlexVlxronlieb
authored andcommitted
[clang][CodeGen] Additional fixes for llvm#114062 (llvm#128166)
This addresses two issues introduced by moving indirect args into an explicit AS (please see <llvm#114062 (comment)> and <llvm#114062 (comment)>): 1. Unconditionally stripping casts from a pre-allocated return slot was incorrect / insufficient (this is illustrated by the `amdgcn_sret_ctor.cpp` test); 2. Putting compiler manufactured sret args in a non default AS can lead to a C-cast (surprisingly) requiring an AS cast (this is illustrated by the `sret_cast_with_nonzero_alloca_as.cpp test). The way we handle (2), by subverting CK_BitCast emission iff a sret arg is involved, is quite naff, but I couldn't think of any other way to use a non default indirect AS and make this case work (hopefully this is a failure of imagination on my part).
1 parent 8040cad commit b9e1cad

File tree

6 files changed

+106
-14
lines changed

6 files changed

+106
-14
lines changed

clang/lib/CodeGen/CGCall.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5178,6 +5178,21 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
51785178
}
51795179
}
51805180
if (IRFunctionArgs.hasSRetArg()) {
5181+
// A mismatch between the allocated return value's AS and the target's
5182+
// chosen IndirectAS can happen e.g. when passing the this pointer through
5183+
// a chain involving stores to / loads from the DefaultAS; we address this
5184+
// here, symmetrically with the handling we have for normal pointer args.
5185+
if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
5186+
llvm::Value *V = SRetPtr.getBasePointer();
5187+
LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace());
5188+
LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace());
5189+
llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
5190+
RetAI.getIndirectAddrSpace());
5191+
5192+
SRetPtr = SRetPtr.withPointer(
5193+
getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, true),
5194+
SRetPtr.isKnownNonNull());
5195+
}
51815196
IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
51825197
getAsNaturalPointerTo(SRetPtr, RetTy);
51835198
} else if (RetAI.isInAlloca()) {
@@ -5409,9 +5424,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
54095424
V->getType()->isIntegerTy())
54105425
V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());
54115426

5412-
// The only plausible mismatch here would be for pointer address spaces,
5413-
// which can happen e.g. when passing a sret arg that is in the AllocaAS
5414-
// to a function that takes a pointer to and argument in the DefaultAS.
5427+
// The only plausible mismatch here would be for pointer address spaces.
54155428
// We assume that the target has a reasonable mapping for the DefaultAS
54165429
// (it can be casted to from incoming specific ASes), and insert an AS
54175430
// cast to address the mismatch.

clang/lib/CodeGen/CGExprAgg.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -302,15 +302,8 @@ void AggExprEmitter::withReturnValueSlot(
302302
llvm::Value *LifetimeSizePtr = nullptr;
303303
llvm::IntrinsicInst *LifetimeStartInst = nullptr;
304304
if (!UseTemp) {
305-
// It is possible for the existing slot we are using directly to have been
306-
// allocated in the correct AS for an indirect return, and then cast to
307-
// the default AS (this is the behaviour of CreateMemTemp), however we know
308-
// that the return address is expected to point to the uncasted AS, hence we
309-
// strip possible pointer casts here.
310-
if (Dest.getAddress().isValid())
311-
RetAddr = Dest.getAddress().withPointer(
312-
Dest.getAddress().getBasePointer()->stripPointerCasts(),
313-
Dest.getAddress().isKnownNonNull());
305+
RetAddr = Dest.getAddress();
306+
RawAddress RetAllocaAddr = RawAddress::invalid();
314307
} else {
315308
RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
316309
llvm::TypeSize Size =

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "clang/Basic/CodeGenOptions.h"
3131
#include "clang/Basic/TargetInfo.h"
3232
#include "llvm/ADT/APFixedPoint.h"
33+
#include "llvm/IR/Argument.h"
3334
#include "llvm/IR/CFG.h"
3435
#include "llvm/IR/Constants.h"
3536
#include "llvm/IR/DataLayout.h"
@@ -2352,6 +2353,21 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
23522353
Value *Src = Visit(const_cast<Expr*>(E));
23532354
llvm::Type *SrcTy = Src->getType();
23542355
llvm::Type *DstTy = ConvertType(DestTy);
2356+
2357+
// FIXME: this is a gross but seemingly necessary workaround for an issue
2358+
// manifesting when a target uses a non-default AS for indirect sret args,
2359+
// but the source HLL is generic, wherein a valid C-cast or reinterpret_cast
2360+
// on the address of a local struct that gets returned by value yields an
2361+
// invalid bitcast from the a pointer to the IndirectAS to a pointer to the
2362+
// DefaultAS. We can only do this subversive thing because sret args are
2363+
// manufactured and them residing in the IndirectAS is a target specific
2364+
// detail, and doing an AS cast here still retains the semantics the user
2365+
// expects. It is desirable to remove this iff a better solution is found.
2366+
if (auto A = dyn_cast<llvm::Argument>(Src); A && A->hasStructRetAttr())
2367+
return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
2368+
CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(),
2369+
DstTy);
2370+
23552371
assert(
23562372
(!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() ||
23572373
SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) &&

clang/test/CodeGen/partial-reinitialization2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ void test5(void)
9191
// CHECK-LABEL: test6
9292
void test6(void)
9393
{
94-
// CHECK: [[VAR:%[a-z0-9]+]] = alloca
95-
// CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]])
94+
// CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0
95+
// CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[LP]])
9696

9797
// CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235()
9898
// CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
3+
4+
struct X { int z[17]; };
5+
6+
// CHECK-LABEL: define dso_local void @_Z3foocc(
7+
// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_X:%.*]]) align 4 [[AGG_RESULT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: [[ENTRY:.*:]]
9+
// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
10+
// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
11+
// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
12+
// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
13+
// CHECK-NEXT: store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
14+
// CHECK-NEXT: store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
15+
// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
16+
// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
17+
// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 1
18+
// CHECK-NEXT: store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
19+
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
20+
// CHECK-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr
21+
// CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST1]], i64 2
22+
// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
23+
// CHECK-NEXT: ret void
24+
//
25+
X foo(char x, char y) {
26+
X r;
27+
28+
*(((char*)&r) + 1) = x;
29+
*(reinterpret_cast<char*>(&r) + 2) = y;
30+
31+
return r;
32+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s
3+
4+
#pragma omp begin declare target
5+
struct S {
6+
~S();
7+
};
8+
S s();
9+
struct E {
10+
S foo;
11+
E() noexcept;
12+
};
13+
E::E() noexcept : foo(s()) {}
14+
#pragma omp end declare target
15+
// CHECK-LABEL: define hidden void @_ZN1EC2Ev(
16+
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
17+
// CHECK-NEXT: [[ENTRY:.*:]]
18+
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
19+
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
20+
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
21+
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
22+
// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5)
23+
// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
24+
// CHECK-NEXT: ret void
25+
//
26+
// CHECK-LABEL: declare void @_Z1sv(
27+
// CHECK-SAME: ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1) #[[ATTR1:[0-9]+]]
28+
//
29+
// CHECK-LABEL: define hidden void @_ZN1EC1Ev(
30+
// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 {
31+
// CHECK-NEXT: [[ENTRY:.*:]]
32+
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
33+
// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
34+
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
35+
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
36+
// CHECK-NEXT: call void @_ZN1EC2Ev(ptr noundef nonnull align 1 dereferenceable(1) [[THIS1]]) #[[ATTR3:[0-9]+]]
37+
// CHECK-NEXT: ret void
38+
//

0 commit comments

Comments
 (0)