Skip to content

Commit cb5ec37

Browse files
authored
[msan] Support vst{2,3,4}_lane instructions (#101215)
This generalizes MSan's Arm NEON vst support, to include the lane-specific variants. This also updates the test from #100645.
1 parent 7752fec commit cb5ec37

File tree

2 files changed

+128
-166
lines changed

2 files changed

+128
-166
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3873,38 +3873,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38733873
setOriginForNaryOp(I);
38743874
}
38753875

3876-
/// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
3876+
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
3877+
/// and vst{2,3,4}lane).
38773878
///
38783879
/// Arm NEON vector store intrinsics have the output address (pointer) as the
3879-
/// last argument, with the initial arguments being the inputs. They return
3880-
/// void.
3880+
/// last argument, with the initial arguments being the inputs (and lane
3881+
/// number for vst{2,3,4}lane). They return void.
38813882
///
38823883
/// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
38833884
/// abcdabcdabcdabcd... into *outP
38843885
/// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
38853886
/// writes aaaa...bbbb...cccc...dddd... into *outP
3887+
/// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
38863888
/// These instructions can all be instrumented with essentially the same
38873889
/// MSan logic, simply by applying the corresponding intrinsic to the shadow.
3888-
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
3890+
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
38893891
IRBuilder<> IRB(&I);
38903892

38913893
// Don't use getNumOperands() because it includes the callee
38923894
int numArgOperands = I.arg_size();
3893-
assert(numArgOperands >= 1);
38943895

3895-
// The last arg operand is the output
3896+
// The last arg operand is the output (pointer)
3897+
assert(numArgOperands >= 1);
38963898
Value *Addr = I.getArgOperand(numArgOperands - 1);
38973899
assert(Addr->getType()->isPointerTy());
3900+
int skipTrailingOperands = 1;
38983901

38993902
if (ClCheckAccessAddress)
39003903
insertShadowCheck(Addr, &I);
39013904

3902-
SmallVector<Value *, 8> Shadows;
3903-
// Every arg operand, other than the last one, is an input vector
3904-
for (int i = 0; i < numArgOperands - 1; i++) {
3905+
// Second-last operand is the lane number (for vst{2,3,4}lane)
3906+
if (useLane) {
3907+
skipTrailingOperands++;
3908+
assert(numArgOperands >= static_cast<int>(skipTrailingOperands));
3909+
assert(isa<IntegerType>(
3910+
I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
3911+
}
3912+
3913+
SmallVector<Value *, 8> ShadowArgs;
3914+
// All the initial operands are the inputs
3915+
for (int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
39053916
assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
39063917
Value *Shadow = getShadow(&I, i);
3907-
Shadows.append(1, Shadow);
3918+
ShadowArgs.append(1, Shadow);
39083919
}
39093920

39103921
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,29 +3932,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39213932
FixedVectorType *OutputVectorTy = FixedVectorType::get(
39223933
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
39233934
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
3924-
(numArgOperands - 1));
3935+
(numArgOperands - skipTrailingOperands));
39253936
Type *OutputShadowTy = getShadowTy(OutputVectorTy);
39263937

3938+
if (useLane)
3939+
ShadowArgs.append(1,
3940+
I.getArgOperand(numArgOperands - skipTrailingOperands));
3941+
39273942
Value *OutputShadowPtr, *OutputOriginPtr;
39283943
// AArch64 NEON does not need alignment (unless OS requires it)
39293944
std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
39303945
Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
3931-
Shadows.append(1, OutputShadowPtr);
3946+
ShadowArgs.append(1, OutputShadowPtr);
39323947

3933-
// CreateIntrinsic will select the correct (integer) type for the
3934-
// intrinsic; the original instruction I may have either integer- or
3935-
// float-type inputs.
39363948
CallInst *CI =
3937-
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
3949+
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
39383950
setShadow(&I, CI);
39393951

39403952
if (MS.TrackOrigins) {
39413953
// TODO: if we modelled the vst* instruction more precisely, we could
39423954
// more accurately track the origins (e.g., if both inputs are
39433955
// uninitialized for vst2, we currently blame the second input, even
39443956
// though part of the output depends only on the first input).
3957+
//
3958+
// This is particularly imprecise for vst{2,3,4}lane, since only one
3959+
// lane of each input is actually copied to the output.
39453960
OriginCombiner OC(this, IRB);
3946-
for (int i = 0; i < numArgOperands - 1; i++)
3961+
for (int i = 0; i < numArgOperands - skipTrailingOperands; i++)
39473962
OC.Add(I.getArgOperand(i));
39483963

39493964
const DataLayout &DL = F.getDataLayout();
@@ -4316,7 +4331,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
43164331
case Intrinsic::aarch64_neon_st2:
43174332
case Intrinsic::aarch64_neon_st3:
43184333
case Intrinsic::aarch64_neon_st4: {
4319-
handleNEONVectorStoreIntrinsic(I);
4334+
handleNEONVectorStoreIntrinsic(I, false);
4335+
break;
4336+
}
4337+
4338+
case Intrinsic::aarch64_neon_st2lane:
4339+
case Intrinsic::aarch64_neon_st3lane:
4340+
case Intrinsic::aarch64_neon_st4lane: {
4341+
handleNEONVectorStoreIntrinsic(I, true);
43204342
break;
43214343
}
43224344

0 commit comments

Comments
 (0)