Skip to content

Commit 3a7ecf5

Browse files
committed
[msan] Support vst{2,3,4}_lane instructions
This generalizes MSan's Arm NEON vst support, to include the lane-specific variants. This also updates the test from llvm#100645.
1 parent c3f7f28 commit 3a7ecf5

File tree

2 files changed

+126
-166
lines changed

2 files changed

+126
-166
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3873,38 +3873,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38733873
setOriginForNaryOp(I);
38743874
}
38753875

3876-
/// Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
3876+
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
3877+
/// and vst{2,3,4}lane).
38773878
///
38783879
/// Arm NEON vector store intrinsics have the output address (pointer) as the
3879-
/// last argument, with the initial arguments being the inputs. They return
3880-
/// void.
3880+
/// last argument, with the initial arguments being the inputs (and lane
3881+
/// number for vst{2,3,4}lane). They return void.
38813882
///
38823883
/// - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
38833884
/// abcdabcdabcdabcd... into *outP
38843885
/// - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
38853886
/// writes aaaa...bbbb...cccc...dddd... into *outP
3887+
/// - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
38863888
/// These instructions can all be instrumented with essentially the same
38873889
/// MSan logic, simply by applying the corresponding intrinsic to the shadow.
3888-
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I) {
3890+
void handleNEONVectorStoreIntrinsic(IntrinsicInst &I, bool useLane) {
38893891
IRBuilder<> IRB(&I);
38903892

38913893
// Don't use getNumOperands() because it includes the callee
38923894
int numArgOperands = I.arg_size();
3893-
assert(numArgOperands >= 1);
38943895

3895-
// The last arg operand is the output
3896+
// The last arg operand is the output (pointer)
3897+
assert(numArgOperands >= 1);
38963898
Value *Addr = I.getArgOperand(numArgOperands - 1);
38973899
assert(Addr->getType()->isPointerTy());
3900+
unsigned int skipTrailingOperands = 1;
38983901

38993902
if (ClCheckAccessAddress)
39003903
insertShadowCheck(Addr, &I);
39013904

3902-
SmallVector<Value *, 8> Shadows;
3903-
// Every arg operand, other than the last one, is an input vector
3904-
for (int i = 0; i < numArgOperands - 1; i++) {
3905+
// Second-last operand is the lane number (for vst{2,3,4}lane)
3906+
if (useLane) {
3907+
skipTrailingOperands ++;
3908+
assert(numArgOperands >= (int)skipTrailingOperands);
3909+
assert(isa<IntegerType>(I.getArgOperand(numArgOperands - skipTrailingOperands)->getType()));
3910+
}
3911+
3912+
SmallVector<Value *, 8> ShadowArgs;
3913+
// All the initial operands are the inputs
3914+
for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++) {
39053915
assert(isa<FixedVectorType>(I.getArgOperand(i)->getType()));
39063916
Value *Shadow = getShadow(&I, i);
3907-
Shadows.append(1, Shadow);
3917+
ShadowArgs.append(1, Shadow);
39083918
}
39093919

39103920
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,29 +3931,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39213931
FixedVectorType *OutputVectorTy = FixedVectorType::get(
39223932
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getElementType(),
39233933
cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements() *
3924-
(numArgOperands - 1));
3934+
(numArgOperands - skipTrailingOperands));
39253935
Type *OutputShadowTy = getShadowTy(OutputVectorTy);
39263936

3937+
if (useLane)
3938+
ShadowArgs.append(1, I.getArgOperand(numArgOperands - skipTrailingOperands));
3939+
39273940
Value *OutputShadowPtr, *OutputOriginPtr;
39283941
// AArch64 NEON does not need alignment (unless OS requires it)
39293942
std::tie(OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr(
39303943
Addr, IRB, OutputShadowTy, Align(1), /*isStore*/ true);
3931-
Shadows.append(1, OutputShadowPtr);
3944+
ShadowArgs.append(1, OutputShadowPtr);
39323945

3933-
// CreateIntrinsic will select the correct (integer) type for the
3934-
// intrinsic; the original instruction I may have either integer- or
3935-
// float-type inputs.
39363946
CallInst *CI =
3937-
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), Shadows);
3947+
IRB.CreateIntrinsic(IRB.getVoidTy(), I.getIntrinsicID(), ShadowArgs);
39383948
setShadow(&I, CI);
39393949

39403950
if (MS.TrackOrigins) {
39413951
// TODO: if we modelled the vst* instruction more precisely, we could
39423952
// more accurately track the origins (e.g., if both inputs are
39433953
// uninitialized for vst2, we currently blame the second input, even
39443954
// though part of the output depends only on the first input).
3955+
//
3956+
// This is particularly imprecise for vst{2,3,4}lane, since only one
3957+
// lane of each input is actually copied to the output.
39453958
OriginCombiner OC(this, IRB);
3946-
for (int i = 0; i < numArgOperands - 1; i++)
3959+
for (unsigned int i = 0; i < numArgOperands - skipTrailingOperands; i++)
39473960
OC.Add(I.getArgOperand(i));
39483961

39493962
const DataLayout &DL = F.getDataLayout();
@@ -4299,7 +4312,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
42994312
case Intrinsic::aarch64_neon_st2:
43004313
case Intrinsic::aarch64_neon_st3:
43014314
case Intrinsic::aarch64_neon_st4: {
4302-
handleNEONVectorStoreIntrinsic(I);
4315+
handleNEONVectorStoreIntrinsic(I, false);
4316+
break;
4317+
}
4318+
4319+
case Intrinsic::aarch64_neon_st2lane:
4320+
case Intrinsic::aarch64_neon_st3lane:
4321+
case Intrinsic::aarch64_neon_st4lane: {
4322+
handleNEONVectorStoreIntrinsic(I, true);
43034323
break;
43044324
}
43054325

0 commit comments

Comments
 (0)