-
Notifications
You must be signed in to change notification settings - Fork 13.5k
release/20.x: Revert Do not use private
as the default AS for when generic
is available (#112442)"
#127771
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/20.x: Revert Do not use private
as the default AS for when generic
is available (#112442)"
#127771
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThis reverts commit 6e0b003. This breaks the rocm-device-libs build, so it should not ship in the release. Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127771.diff 21 Files Affected:
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0d308cb6af969..9ea366af56a52 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
TargetInfo::adjust(Diags, Opts);
// ToDo: There are still a few places using default address space as private
- // address space in OpenCL, which needs to be cleaned up, then the references
- // to OpenCL can be removed from the following line.
- setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
+ // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
+ // can be removed from the following line.
+ setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
!isAMDGCN(getTriple()));
}
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7..f38f86c792f69 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
DI->setLocation(D->getLocation());
DI->EmitDeclareOfBlockLiteralArgVariable(
*BlockInfo, D->getName(), argNum,
- cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
- Builder);
+ cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
}
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7ec9d59bfed5c..5237533364294 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
- // The EmitLifetime* pair expect a naked Alloca as their last argument,
- // however for cases where the default AS is not the Alloca AS, Tmp is
- // actually the Alloca ascasted to the default AS, hence the
- // stripPointerCasts()
- llvm::Value *Alloca = TmpPtr->stripPointerCasts();
llvm::Value *TmpSize = EmitLifetimeStart(
- CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
+ CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
- // Return the Alloca itself rather than a potential ascast as this is only
- // used by the paired EmitLifetimeEnd.
- return std::tie(ElemPtr, TmpSize, Alloca);
+ return std::tie(ElemPtr, TmpSize, TmpPtr);
};
// Could have events and/or varargs.
diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c
index d83ae05b0aea2..20cbb511a1758 100644
--- a/clang/test/CodeGen/scoped-fence-ops.c
+++ b/clang/test/CodeGen/scoped-fence-ops.c
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL12 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL20 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \
@@ -30,34 +30,62 @@ void fe1a() {
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
}
-// AMDGCN-LABEL: define hidden void @fe1b(
-// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
-// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
-// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
-// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
-// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
-// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
-// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
-// AMDGCN-NEXT: ]
-// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
-// AMDGCN-NEXT: ret void
-// AMDGCN: [[ACQUIRE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") acquire
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[RELEASE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[ACQREL]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") acq_rel
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SEQCST]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12-LABEL: define hidden void @fe1b(
+// AMDGCN-CL12-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL12-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL12-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
+// AMDGCN-CL12-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL12-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-CL12-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-CL12-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-CL12-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-CL12-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-CL12-NEXT: ]
+// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL12-NEXT: ret void
+// AMDGCN-CL12: [[ACQUIRE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acquire
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[RELEASE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[ACQREL]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acq_rel
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SEQCST]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// AMDGCN-CL20-LABEL: define hidden void @fe1b(
+// AMDGCN-CL20-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL20-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL20-NEXT: store i32 [[ORD]], ptr addrspace(5) [[ORD_ADDR]], align 4
+// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[ORD_ADDR]], align 4
+// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL20-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-CL20-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-CL20-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-CL20-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-CL20-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-CL20-NEXT: ]
+// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL20-NEXT: ret void
+// AMDGCN-CL20: [[ACQUIRE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acquire
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[RELEASE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[ACQREL]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acq_rel
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SEQCST]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
//
// SPIRV-LABEL: define hidden spir_func void @fe1b(
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
@@ -119,37 +147,68 @@ void fe1b(int ord) {
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
}
-// AMDGCN-LABEL: define hidden void @fe1c(
-// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
-// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
-// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
-// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
-// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
-// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
-// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
-// AMDGCN-NEXT: ]
-// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
-// AMDGCN-NEXT: ret void
-// AMDGCN: [[DEVICE_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("agent") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SYSTEM_SCOPE]]:
-// AMDGCN-NEXT: fence release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[WORKGROUP_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[WAVEFRONT_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("wavefront") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SINGLE_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("singlethread") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12-LABEL: define hidden void @fe1c(
+// AMDGCN-CL12-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL12-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL12-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
+// AMDGCN-CL12-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL12-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: ]
+// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL12-NEXT: ret void
+// AMDGCN-CL12: [[DEVICE_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("agent") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SYSTEM_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[WORKGROUP_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("wavefront") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SINGLE_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("singlethread") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// AMDGCN-CL20-LABEL: define hidden void @fe1c(
+// AMDGCN-CL20-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL20-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL20-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL20-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: ]
+// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL20-NEXT: ret void
+// AMDGCN-CL20: [[DEVICE_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("agent") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SYSTEM_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[WORKGROUP_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("wavefront") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SINGLE_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("singlethread") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
//
// SPIRV-LABEL: define hidden spir_func void @fe1c(
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 57d056b0ff9d5..7377b5bcbc347 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -69,11 +69,9 @@ struct LargeStructOneMember g_s;
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
-// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
-// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
+// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
// SPIR-LABEL: define dso_local spir_func void @foo(
@@ -152,22 +150,19 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
-// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
-// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
-// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
+// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
-// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
-// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
+// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
-// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
-// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
+// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
+// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @ker(
@@ -250,11 +245,10 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @foo_large(
-// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
+// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
-// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
-// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
+// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
+// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @foo_large(
@@ -325,18 +319,15 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
-// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT: [[TM...
[truncated]
|
@llvm/pr-subscribers-clang-codegen Author: Matt Arsenault (arsenm) ChangesThis reverts commit 6e0b003. This breaks the rocm-device-libs build, so it should not ship in the release. Patch is 214.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127771.diff 21 Files Affected:
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 0d308cb6af969..9ea366af56a52 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
TargetInfo::adjust(Diags, Opts);
// ToDo: There are still a few places using default address space as private
- // address space in OpenCL, which needs to be cleaned up, then the references
- // to OpenCL can be removed from the following line.
- setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
+ // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
+ // can be removed from the following line.
+ setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
!isAMDGCN(getTriple()));
}
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index a7584a95c8ca7..f38f86c792f69 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
DI->setLocation(D->getLocation());
DI->EmitDeclareOfBlockLiteralArgVariable(
*BlockInfo, D->getName(), argNum,
- cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
- Builder);
+ cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
}
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7ec9d59bfed5c..5237533364294 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
/*IndexTypeQuals=*/0);
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
llvm::Value *TmpPtr = Tmp.getPointer();
- // The EmitLifetime* pair expect a naked Alloca as their last argument,
- // however for cases where the default AS is not the Alloca AS, Tmp is
- // actually the Alloca ascasted to the default AS, hence the
- // stripPointerCasts()
- llvm::Value *Alloca = TmpPtr->stripPointerCasts();
llvm::Value *TmpSize = EmitLifetimeStart(
- CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
+ CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
llvm::Value *ElemPtr;
// Each of the following arguments specifies the size of the corresponding
// argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Builder.CreateAlignedStore(
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
}
- // Return the Alloca itself rather than a potential ascast as this is only
- // used by the paired EmitLifetimeEnd.
- return std::tie(ElemPtr, TmpSize, Alloca);
+ return std::tie(ElemPtr, TmpSize, TmpPtr);
};
// Could have events and/or varargs.
diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c
index d83ae05b0aea2..20cbb511a1758 100644
--- a/clang/test/CodeGen/scoped-fence-ops.c
+++ b/clang/test/CodeGen/scoped-fence-ops.c
@@ -1,8 +1,8 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL12 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL20 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \
@@ -30,34 +30,62 @@ void fe1a() {
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
}
-// AMDGCN-LABEL: define hidden void @fe1b(
-// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
-// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
-// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
-// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
-// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
-// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
-// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
-// AMDGCN-NEXT: ]
-// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
-// AMDGCN-NEXT: ret void
-// AMDGCN: [[ACQUIRE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") acquire
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[RELEASE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[ACQREL]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") acq_rel
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SEQCST]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12-LABEL: define hidden void @fe1b(
+// AMDGCN-CL12-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL12-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL12-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
+// AMDGCN-CL12-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL12-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-CL12-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-CL12-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-CL12-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-CL12-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-CL12-NEXT: ]
+// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL12-NEXT: ret void
+// AMDGCN-CL12: [[ACQUIRE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acquire
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[RELEASE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[ACQREL]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acq_rel
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SEQCST]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// AMDGCN-CL20-LABEL: define hidden void @fe1b(
+// AMDGCN-CL20-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL20-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL20-NEXT: store i32 [[ORD]], ptr addrspace(5) [[ORD_ADDR]], align 4
+// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[ORD_ADDR]], align 4
+// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL20-NEXT: i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-CL20-NEXT: i32 2, label %[[ACQUIRE]]
+// AMDGCN-CL20-NEXT: i32 3, label %[[RELEASE:.*]]
+// AMDGCN-CL20-NEXT: i32 4, label %[[ACQREL:.*]]
+// AMDGCN-CL20-NEXT: i32 5, label %[[SEQCST:.*]]
+// AMDGCN-CL20-NEXT: ]
+// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL20-NEXT: ret void
+// AMDGCN-CL20: [[ACQUIRE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acquire
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[RELEASE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[ACQREL]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acq_rel
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SEQCST]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
//
// SPIRV-LABEL: define hidden spir_func void @fe1b(
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
@@ -119,37 +147,68 @@ void fe1b(int ord) {
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
}
-// AMDGCN-LABEL: define hidden void @fe1c(
-// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
-// AMDGCN-NEXT: [[ENTRY:.*:]]
-// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
-// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
-// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
-// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
-// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
-// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
-// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
-// AMDGCN-NEXT: ]
-// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
-// AMDGCN-NEXT: ret void
-// AMDGCN: [[DEVICE_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("agent") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SYSTEM_SCOPE]]:
-// AMDGCN-NEXT: fence release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[WORKGROUP_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("workgroup") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[WAVEFRONT_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("wavefront") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
-// AMDGCN: [[SINGLE_SCOPE]]:
-// AMDGCN-NEXT: fence syncscope("singlethread") release
-// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12-LABEL: define hidden void @fe1c(
+// AMDGCN-CL12-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL12-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL12-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
+// AMDGCN-CL12-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL12-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-CL12-NEXT: ]
+// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL12-NEXT: ret void
+// AMDGCN-CL12: [[DEVICE_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("agent") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SYSTEM_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[WORKGROUP_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("wavefront") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL12: [[SINGLE_SCOPE]]:
+// AMDGCN-CL12-NEXT: fence syncscope("singlethread") release
+// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// AMDGCN-CL20-LABEL: define hidden void @fe1c(
+// AMDGCN-CL20-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
+// AMDGCN-CL20-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-CL20-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
+// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-CL20-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-CL20-NEXT: ]
+// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-CL20-NEXT: ret void
+// AMDGCN-CL20: [[DEVICE_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("agent") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SYSTEM_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[WORKGROUP_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[WAVEFRONT_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("wavefront") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-CL20: [[SINGLE_SCOPE]]:
+// AMDGCN-CL20-NEXT: fence syncscope("singlethread") release
+// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
//
// SPIRV-LABEL: define hidden spir_func void @fe1c(
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index 57d056b0ff9d5..7377b5bcbc347 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -69,11 +69,9 @@ struct LargeStructOneMember g_s;
// AMDGCN20-NEXT: [[ENTRY:.*:]]
// AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
-// AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
-// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
-// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
+// AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
+// AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
// AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
//
// SPIR-LABEL: define dso_local spir_func void @foo(
@@ -152,22 +150,19 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
// AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
-// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
-// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
-// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
-// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
+// AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
+// AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
-// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
+// AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
// AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
// AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
// AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
-// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
+// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
// AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
-// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
-// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
+// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
+// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_kernel void @ker(
@@ -250,11 +245,10 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
// AMDGCN-NEXT: ret void
//
// AMDGCN20-LABEL: define dso_local void @foo_large(
-// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
+// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
// AMDGCN20-NEXT: [[ENTRY:.*:]]
-// AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
-// AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
+// AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
+// AMDGCN20-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
// AMDGCN20-NEXT: ret void
//
// SPIR-LABEL: define dso_local spir_func void @foo_large(
@@ -325,18 +319,15 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
// AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
// AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
-// AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
-// AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
-// AMDGCN20-NEXT: [[TM...
[truncated]
|
…default AS for when `generic` is available (llvm#112442)" This reverts commit 6e0b003. This breaks the rocm-device-libs build, so it should not ship in the release.
c09c1b2
to
2cb3798
Compare
@arsenm (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
This reverts commit 6e0b003.
This breaks the rocm-device-libs build, so it should not ship in the release.