Skip to content

Commit ab69571

Browse files
spallkostasalv
authored andcommitted
[HLSL] Move where ZExt happens in 'EmitStoreThroughExtVectorComponentLValue' to handle bug with hlsl boolean vector swizzles (llvm#140627)
In 'EmitStoreThroughExtVectorComponentLValue', move the code which ZExts in the case the Destination Scalar Type is larger than the Source Scalar Type, to the top of the function, to ensure each condition is handled. The previous code missed this case: ``` bool4 b = true.xxxx; b.xyz = false.xxx; ``` Leading to a bad shuffle vector. Closes llvm#140564
1 parent f323eb4 commit ab69571

File tree

2 files changed

+31
-16
lines changed

2 files changed

+31
-16
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,14 +2694,20 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
26942694

26952695
void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
26962696
LValue Dst) {
2697+
llvm::Value *SrcVal = Src.getScalarVal();
2698+
Address DstAddr = Dst.getExtVectorAddress();
2699+
if (DstAddr.getElementType()->getScalarSizeInBits() >
2700+
SrcVal->getType()->getScalarSizeInBits())
2701+
SrcVal = Builder.CreateZExt(
2702+
SrcVal, convertTypeForLoadStore(Dst.getType(), SrcVal->getType()));
2703+
26972704
// HLSL allows storing to scalar values through ExtVector component LValues.
26982705
// To support this we need to handle the case where the destination address is
26992706
// a scalar.
2700-
Address DstAddr = Dst.getExtVectorAddress();
27012707
if (!DstAddr.getElementType()->isVectorTy()) {
27022708
assert(!Dst.getType()->isVectorType() &&
27032709
"this should only occur for non-vector l-values");
2704-
Builder.CreateStore(Src.getScalarVal(), DstAddr, Dst.isVolatileQualified());
2710+
Builder.CreateStore(SrcVal, DstAddr, Dst.isVolatileQualified());
27052711
return;
27062712
}
27072713

@@ -2722,11 +2728,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27222728
for (unsigned i = 0; i != NumSrcElts; ++i)
27232729
Mask[getAccessedFieldNo(i, Elts)] = i;
27242730

2725-
llvm::Value *SrcVal = Src.getScalarVal();
2726-
if (VecTy->getScalarSizeInBits() >
2727-
SrcVal->getType()->getScalarSizeInBits())
2728-
SrcVal = Builder.CreateZExt(SrcVal, VecTy);
2729-
27302731
Vec = Builder.CreateShuffleVector(SrcVal, Mask);
27312732
} else if (NumDstElts > NumSrcElts) {
27322733
// Extended the source vector to the same length and then shuffle it
@@ -2737,8 +2738,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27372738
for (unsigned i = 0; i != NumSrcElts; ++i)
27382739
ExtMask.push_back(i);
27392740
ExtMask.resize(NumDstElts, -1);
2740-
llvm::Value *ExtSrcVal =
2741-
Builder.CreateShuffleVector(Src.getScalarVal(), ExtMask);
2741+
llvm::Value *ExtSrcVal = Builder.CreateShuffleVector(SrcVal, ExtMask);
27422742
// build identity
27432743
SmallVector<int, 4> Mask;
27442744
for (unsigned i = 0; i != NumDstElts; ++i)
@@ -2764,10 +2764,6 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
27642764
unsigned InIdx = getAccessedFieldNo(0, Elts);
27652765
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
27662766

2767-
llvm::Value *SrcVal = Src.getScalarVal();
2768-
if (VecTy->getScalarSizeInBits() > SrcVal->getType()->getScalarSizeInBits())
2769-
SrcVal = Builder.CreateZExt(SrcVal, VecTy->getScalarType());
2770-
27712767
Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
27722768
}
27732769

clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ int AssignInt(int V){
233233

234234
// CHECK: lor.end:
235235
// CHECK-NEXT: [[H:%.*]] = phi i1 [ true, %entry ], [ [[G]], %lor.rhs ]
236-
// CHECK-NEXT: store i1 [[H]], ptr [[XAddr]], align 4
236+
// CHECK-NEXT: [[J:%.*]] = zext i1 %9 to i32
237+
// CHECK-NEXT: store i32 [[J]], ptr [[XAddr]], align 4
237238
// CHECK-NEXT: [[I:%.*]] = load i32, ptr [[XAddr]], align 4
238239
// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[I]] to i1
239240
// CHECK-NEXT: ret i1 [[LoadV]]
@@ -257,8 +258,8 @@ bool AssignBool(bool V) {
257258
// CHECK-NEXT: store <2 x i32> [[A]], ptr [[X]], align 8
258259
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
259260
// CHECK-NEXT: [[LV1:%.*]] = trunc i32 [[B]] to i1
260-
// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
261261
// CHECK-NEXT: [[D:%.*]] = zext i1 [[LV1]] to i32
262+
// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[X]], align 8
262263
// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i32 1
263264
// CHECK-NEXT: store <2 x i32> [[E]], ptr [[X]], align 8
264265
// CHECK-NEXT: ret void
@@ -275,8 +276,8 @@ void AssignBool2(bool V) {
275276
// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[X]], align 8
276277
// CHECK-NEXT: [[Z:%.*]] = load <2 x i32>, ptr [[VAddr]], align 8
277278
// CHECK-NEXT: [[LV:%.*]] = trunc <2 x i32> [[Z]] to <2 x i1>
278-
// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
279279
// CHECK-NEXT: [[B:%.*]] = zext <2 x i1> [[LV]] to <2 x i32>
280+
// CHECK-NEXT: [[A:%.*]] = load <2 x i32>, ptr [[X]], align 8
280281
// CHECK-NEXT: [[C:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
281282
// CHECK-NEXT: store <2 x i32> [[C]], ptr [[X]], align 8
282283
// CHECK-NEXT: ret void
@@ -302,3 +303,21 @@ bool2 AccessBools() {
302303
bool4 X = true.xxxx;
303304
return X.zw;
304305
}
306+
307+
// CHECK-LABEL: define void {{.*}}BoolSizeMismatch{{.*}}
308+
// CHECK: [[B:%.*]] = alloca <4 x i32>, align 16
309+
// CHECK-NEXT: [[Tmp:%.*]] = alloca <1 x i32>, align 4
310+
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[B]], align 16
311+
// CHECK-NEXT: store <1 x i32> zeroinitializer, ptr [[Tmp]], align 4
312+
// CHECK-NEXT: [[L0:%.*]] = load <1 x i32>, ptr [[Tmp]], align 4
313+
// CHECK-NEXT: [[L1:%.*]] = shufflevector <1 x i32> [[L0]], <1 x i32> poison, <3 x i32> zeroinitializer
314+
// CHECK-NEXT: [[TruncV:%.*]] = trunc <3 x i32> [[L1]] to <3 x i1>
315+
// CHECK-NEXT: [[L2:%.*]] = zext <3 x i1> [[TruncV]] to <3 x i32>
316+
// CHECK-NEXT: [[L3:%.*]] = load <4 x i32>, ptr [[B]], align 16
317+
// CHECK-NEXT: [[L4:%.*]] = shufflevector <3 x i32> [[L2]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
318+
// CHECK-NEXT: [[L5:%.*]] = shufflevector <4 x i32> [[L3]], <4 x i32> [[L4]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
319+
// CHECK-NEXT: store <4 x i32> [[L5]], ptr [[B]], align 16
320+
void BoolSizeMismatch() {
321+
bool4 B = {true,true,true,true};
322+
B.xyz = false.xxx;
323+
}

0 commit comments

Comments
 (0)