-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[HLSL] Make memory representation of boolean vectors in HLSL, vectors of i32. Add support for boolean swizzling. #123977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
50f8b16
b231f4e
dd7e459
6734eed
d9a4777
e0638d1
2e534a5
793541d
1fe0951
505b17b
0e6da2b
fbaf536
3d22ed0
7a0ccdd
d3904c4
e947d2c
5b364cb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -410,6 +410,12 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements, | |
VectorTypeBits.NumElements = nElements; | ||
} | ||
|
||
bool Type::isPackedVectorBoolType(const ASTContext &ctx) const { | ||
if (ctx.getLangOpts().HLSL) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be a dumb suggestion, but is the a way to just check if the mem reprsentation is i32 or i1? HLSL is probably the only language mode that needs this distinction but it feel like this shouldn't have a lang opt toggle based on the function name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code to state that a bool vector should be a vector of i32s isn't accessible here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would make for slightly better abstraction to add a |
||
return false; | ||
return isExtVectorBoolType(); | ||
} | ||
|
||
BitIntType::BitIntType(bool IsUnsigned, unsigned NumBits) | ||
: Type(BitInt, QualType{}, TypeDependence::None), IsUnsigned(IsUnsigned), | ||
NumBits(NumBits) {} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1985,7 +1985,7 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, | |
|
||
if (const auto *ClangVecTy = Ty->getAs<VectorType>()) { | ||
// Boolean vectors use `iN` as storage type. | ||
if (ClangVecTy->isExtVectorBoolType()) { | ||
if (ClangVecTy->isPackedVectorBoolType(getContext())) { | ||
llvm::Type *ValTy = ConvertType(Ty); | ||
unsigned ValNumElems = | ||
cast<llvm::FixedVectorType>(ValTy)->getNumElements(); | ||
|
@@ -2064,6 +2064,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { | |
|
||
if (Ty->isExtVectorBoolType()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you replaced a bunch of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My intention was to replace calls to 'isExtVectorBoolType' with 'isPackedVectorBoolType' anywhere we want an hlsl boolean vector to follow the normal handling path for vectors; Hopefully reviews will verify I got this right. |
||
llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType()); | ||
if (getLangOpts().HLSL) | ||
spall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return Builder.CreateZExt(Value, StoreTy); | ||
|
||
// Expand to the memory bit width. | ||
unsigned MemNumElems = StoreTy->getPrimitiveSizeInBits(); | ||
// <N x i1> --> <P x i1>. | ||
|
@@ -2081,6 +2084,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { | |
llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) { | ||
if (Ty->isExtVectorBoolType()) { | ||
const auto *RawIntTy = Value->getType(); | ||
if (getLangOpts().HLSL) | ||
return Builder.CreateTrunc(Value, ConvertType(Ty), "loadedv"); | ||
spall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Bitcast iP --> <P x i1>. | ||
auto *PaddedVecTy = llvm::FixedVectorType::get( | ||
Builder.getInt1Ty(), RawIntTy->getPrimitiveSizeInBits()); | ||
|
@@ -2152,7 +2158,8 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, | |
if (auto *VecTy = dyn_cast<llvm::FixedVectorType>(SrcTy)) { | ||
auto *NewVecTy = | ||
CGM.getABIInfo().getOptimalVectorMemoryType(VecTy, getLangOpts()); | ||
if (!ClangVecTy->isExtVectorBoolType() && VecTy != NewVecTy) { | ||
if (!ClangVecTy->isPackedVectorBoolType(getContext()) && | ||
VecTy != NewVecTy) { | ||
SmallVector<int, 16> Mask(NewVecTy->getNumElements(), -1); | ||
std::iota(Mask.begin(), Mask.begin() + VecTy->getNumElements(), 0); | ||
Value = Builder.CreateShuffleVector(Value, Mask, "extractVec"); | ||
|
@@ -2343,7 +2350,13 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { | |
if (!ExprVT) { | ||
unsigned InIdx = getAccessedFieldNo(0, Elts); | ||
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx); | ||
return RValue::get(Builder.CreateExtractElement(Vec, Elt)); | ||
|
||
llvm::Value *Element = Builder.CreateExtractElement(Vec, Elt); | ||
|
||
if (getLangOpts().HLSL && LV.getType()->isBooleanType()) | ||
Element = Builder.CreateTrunc(Element, ConvertType(LV.getType())); | ||
|
||
return RValue::get(Element); | ||
} | ||
|
||
// Always use shuffle vector to try to retain the original program structure | ||
|
@@ -2354,6 +2367,10 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) { | |
Mask.push_back(getAccessedFieldNo(i, Elts)); | ||
|
||
Vec = Builder.CreateShuffleVector(Vec, Mask); | ||
|
||
if (getLangOpts().HLSL && LV.getType()->isExtVectorBoolType()) | ||
Vec = EmitFromMemory(Vec, LV.getType()); | ||
|
||
return RValue::get(Vec); | ||
} | ||
|
||
|
@@ -2407,6 +2424,12 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, | |
// Read/modify/write the vector, inserting the new element. | ||
llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(), | ||
Dst.isVolatileQualified()); | ||
llvm::Type *OldVecTy = Vec->getType(); | ||
if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType()) | ||
|
||
Vec = | ||
Builder.CreateTrunc(Vec, ConvertType(Dst.getType()), "truncboolv"); | ||
|
||
auto *IRStoreTy = dyn_cast<llvm::IntegerType>(Vec->getType()); | ||
if (IRStoreTy) { | ||
auto *IRVecTy = llvm::FixedVectorType::get( | ||
|
@@ -2420,6 +2443,10 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, | |
// <N x i1> --> <iN>. | ||
Vec = Builder.CreateBitCast(Vec, IRStoreTy); | ||
} | ||
|
||
if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType()) | ||
Vec = Builder.CreateZExt(Vec, OldVecTy); | ||
|
||
Builder.CreateStore(Vec, Dst.getVectorAddress(), | ||
Dst.isVolatileQualified()); | ||
return; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1984,6 +1984,15 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, | |
return Res; | ||
} | ||
|
||
// In HLSL bool vectors are stored in memory as a vector of i32 | ||
if (destType->isExtVectorBoolType() && CGM.getContext().getLangOpts().HLSL) { | ||
spall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
llvm::Type *boolVecTy = CGM.getTypes().ConvertTypeForMem(destType); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need special handling for non-hlsl ext-bool-vector types? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't believe so. This code is necessary for HLSL because the constant 'C' is a <2 x i1> but HLSL needs it to be a <2 x i32> hence the zero extension. For non HLSL vectors the expected form would be <2 x i1> so nothing needs to be done. |
||
llvm::Constant *Res = llvm::ConstantFoldCastOperand( | ||
llvm::Instruction::ZExt, C, boolVecTy, CGM.getDataLayout()); | ||
assert(Res && "Constant folding must succeed"); | ||
return Res; | ||
} | ||
|
||
if (destType->isBitIntType()) { | ||
ConstantAggregateBuilder Builder(CGM); | ||
llvm::Type *LoadStoreTy = CGM.getTypes().convertTypeForLoadStore(destType); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,6 +112,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { | |
// Check for the boolean vector case. | ||
if (T->isExtVectorBoolType()) { | ||
auto *FixedVT = cast<llvm::FixedVectorType>(R); | ||
|
||
if (Context.getLangOpts().HLSL) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could use the |
||
llvm::Type *IRElemTy = ConvertTypeForMem(Context.BoolTy); | ||
return llvm::FixedVectorType::get(IRElemTy, FixedVT->getNumElements()); | ||
} | ||
|
||
// Pad to at least one byte. | ||
uint64_t BytePadded = std::max<uint64_t>(FixedVT->getNumElements(), 8); | ||
return llvm::IntegerType::get(FixedVT->getContext(), BytePadded); | ||
|
@@ -648,7 +654,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { | |
case Type::Vector: { | ||
const auto *VT = cast<VectorType>(Ty); | ||
// An ext_vector_type of Bool is really a vector of bits. | ||
llvm::Type *IRElemTy = VT->isExtVectorBoolType() | ||
llvm::Type *IRElemTy = VT->isPackedVectorBoolType(Context) | ||
? llvm::Type::getInt1Ty(getLLVMContext()) | ||
: ConvertType(VT->getElementType()); | ||
ResultType = llvm::FixedVectorType::get(IRElemTy, VT->getNumElements()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s | ||
|
||
// CHECK: %struct.S = type { <2 x i32>, float } | ||
// CHECK: [[ConstS:@.*]] = private unnamed_addr constant %struct.S { <2 x i32> splat (i32 1), float 1.000000e+00 }, align 8 | ||
// CHECK: [[ConstArr:.*]] = private unnamed_addr constant [2 x <2 x i32>] [<2 x i32> splat (i32 1), <2 x i32> zeroinitializer], align 8 | ||
|
||
struct S { | ||
bool2 bv; | ||
float f; | ||
}; | ||
|
||
// CHECK-LABEL: define noundef i1 {{.*}}fn1{{.*}} | ||
// CHECK: [[B:%.*]] = alloca <2 x i32>, align 8 | ||
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[B]], align 8 | ||
spall marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// CHECK-NEXT: [[BoolVec:%.*]] = load <2 x i32>, ptr [[B]], align 8 | ||
// CHECK-NEXT: [[L:%.*]] = trunc <2 x i32> [[BoolVec:%.*]] to <2 x i1> | ||
// CHECK-NEXT: [[VecExt:%.*]] = extractelement <2 x i1> [[L]], i32 0 | ||
// CHECK-NEXT: ret i1 [[VecExt]] | ||
bool fn1() { | ||
bool2 B = {true,true}; | ||
return B[0]; | ||
} | ||
|
||
// CHECK-LABEL: define noundef <2 x i1> {{.*}}fn2{{.*}} | ||
// CHECK: [[VAddr:%.*]] = alloca i32, align 4 | ||
// CHECK-NEXT: [[A:%.*]] = alloca <2 x i32>, align 8 | ||
// CHECK-NEXT: [[StoreV:%.*]] = zext i1 {{.*}} to i32 | ||
// CHECK-NEXT: store i32 [[StoreV]], ptr [[VAddr]], align 4 | ||
// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[VAddr]], align 4 | ||
// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[L]] to i1 | ||
// CHECK-NEXT: [[Vec:%.*]] = insertelement <2 x i1> poison, i1 [[LoadV]], i32 0 | ||
// CHECK-NEXT: [[Vec1:%.*]] = insertelement <2 x i1> [[Vec]], i1 true, i32 1 | ||
// CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[Vec1]] to <2 x i32> | ||
// CHECK-NEXT: store <2 x i32> [[Z]], ptr [[A]], align 8 | ||
// CHECK-NEXT: [[LoadBV:%.*]] = load <2 x i32>, ptr [[A]], align 8 | ||
// CHECK-NEXT: [[LoadV2:%.*]] = trunc <2 x i32> [[LoadBV]] to <2 x i1> | ||
// CHECK-NEXT: ret <2 x i1> [[LoadV2]] | ||
bool2 fn2(bool V) { | ||
bool2 A = {V,true}; | ||
return A; | ||
} | ||
|
||
// CHECK-LABEL: define noundef i1 {{.*}}fn3{{.*}} | ||
// CHECK: [[s:%.*]] = alloca %struct.S, align 8 | ||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[s]], ptr align 8 [[ConstS]], i32 16, i1 false) | ||
// CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[s]], i32 0, i32 0 | ||
// CHECK-NEXT: [[LBV:%.*]] = load <2 x i32>, ptr [[BV]], align 8 | ||
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[LBV]] to <2 x i1> | ||
// CHECK-NEXT: [[VX:%.*]] = extractelement <2 x i1> [[LV]], i32 0 | ||
// CHECK-NEXT: ret i1 [[VX]] | ||
bool fn3() { | ||
S s = {{true,true}, 1.0}; | ||
return s.bv[0]; | ||
} | ||
|
||
// CHECK-LABEL: define noundef i1 {{.*}}fn4{{.*}} | ||
// CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8 | ||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 [[ConstArr]], i32 16, i1 false) | ||
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0 | ||
// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Idx]], align 8 | ||
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[L]] to <2 x i1> | ||
// CHECK-NEXT: [[VX:%.*]] = extractelement <2 x i1> [[LV]], i32 1 | ||
// CHECK-NEXT: ret i1 [[VX]] | ||
bool fn4() { | ||
bool2 Arr[2] = {{true,true}, {false,false}}; | ||
return Arr[0][1]; | ||
} |
Uh oh!
There was an error while loading. Please reload this page.