@@ -3873,38 +3873,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3873
3873
setOriginForNaryOp (I);
3874
3874
}
3875
3875
3876
- // / Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
3876
+ // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
3877
+ // / and vst{2,3,4}lane).
3877
3878
// /
3878
3879
// / Arm NEON vector store intrinsics have the output address (pointer) as the
3879
- // / last argument, with the initial arguments being the inputs. They return
3880
- // / void.
3880
+ // / last argument, with the initial arguments being the inputs (and lane
3881
+ // / number for vst{2,3,4}lane). They return void.
3881
3882
// /
3882
3883
// / - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
3883
3884
// / abcdabcdabcdabcd... into *outP
3884
3885
// / - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
3885
3886
// / writes aaaa...bbbb...cccc...dddd... into *outP
3887
+ // / - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
3886
3888
// / These instructions can all be instrumented with essentially the same
3887
3889
// / MSan logic, simply by applying the corresponding intrinsic to the shadow.
3888
- void handleNEONVectorStoreIntrinsic (IntrinsicInst &I) {
3890
+ void handleNEONVectorStoreIntrinsic (IntrinsicInst &I, bool useLane ) {
3889
3891
IRBuilder<> IRB (&I);
3890
3892
3891
3893
// Don't use getNumOperands() because it includes the callee
3892
3894
int numArgOperands = I.arg_size ();
3893
- assert (numArgOperands >= 1 );
3894
3895
3895
- // The last arg operand is the output
3896
+ // The last arg operand is the output (pointer)
3897
+ assert (numArgOperands >= 1 );
3896
3898
Value *Addr = I.getArgOperand (numArgOperands - 1 );
3897
3899
assert (Addr->getType ()->isPointerTy ());
3900
+ unsigned int skipTrailingOperands = 1 ;
3898
3901
3899
3902
if (ClCheckAccessAddress)
3900
3903
insertShadowCheck (Addr, &I);
3901
3904
3902
- SmallVector<Value *, 8 > Shadows;
3903
- // Every arg operand, other than the last one, is an input vector
3904
- for (int i = 0 ; i < numArgOperands - 1 ; i++) {
3905
+ // Second-last operand is the lane number (for vst{2,3,4}lane)
3906
+ if (useLane) {
3907
+ skipTrailingOperands ++;
3908
+ assert (numArgOperands >= (int )skipTrailingOperands);
3909
+ assert (isa<IntegerType>(I.getArgOperand (numArgOperands - skipTrailingOperands)->getType ()));
3910
+ }
3911
+
3912
+ SmallVector<Value *, 8 > ShadowArgs;
3913
+ // All the initial operands are the inputs
3914
+ for (unsigned int i = 0 ; i < numArgOperands - skipTrailingOperands; i++) {
3905
3915
assert (isa<FixedVectorType>(I.getArgOperand (i)->getType ()));
3906
3916
Value *Shadow = getShadow (&I, i);
3907
- Shadows .append (1 , Shadow);
3917
+ ShadowArgs .append (1 , Shadow);
3908
3918
}
3909
3919
3910
3920
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,29 +3931,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3921
3931
FixedVectorType *OutputVectorTy = FixedVectorType::get (
3922
3932
cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getElementType (),
3923
3933
cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getNumElements () *
3924
- (numArgOperands - 1 ));
3934
+ (numArgOperands - skipTrailingOperands ));
3925
3935
Type *OutputShadowTy = getShadowTy (OutputVectorTy);
3926
3936
3937
+ if (useLane)
3938
+ ShadowArgs.append (1 , I.getArgOperand (numArgOperands - skipTrailingOperands));
3939
+
3927
3940
Value *OutputShadowPtr, *OutputOriginPtr;
3928
3941
// AArch64 NEON does not need alignment (unless OS requires it)
3929
3942
std::tie (OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr (
3930
3943
Addr, IRB, OutputShadowTy, Align (1 ), /* isStore*/ true );
3931
- Shadows .append (1 , OutputShadowPtr);
3944
+ ShadowArgs .append (1 , OutputShadowPtr);
3932
3945
3933
- // CreateIntrinsic will select the correct (integer) type for the
3934
- // intrinsic; the original instruction I may have either integer- or
3935
- // float-type inputs.
3936
3946
CallInst *CI =
3937
- IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), Shadows );
3947
+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs );
3938
3948
setShadow (&I, CI);
3939
3949
3940
3950
if (MS.TrackOrigins ) {
3941
3951
// TODO: if we modelled the vst* instruction more precisely, we could
3942
3952
// more accurately track the origins (e.g., if both inputs are
3943
3953
// uninitialized for vst2, we currently blame the second input, even
3944
3954
// though part of the output depends only on the first input).
3955
+ //
3956
+ // This is particularly imprecise for vst{2,3,4}lane, since only one
3957
+ // lane of each input is actually copied to the output.
3945
3958
OriginCombiner OC (this , IRB);
3946
- for (int i = 0 ; i < numArgOperands - 1 ; i++)
3959
+ for (unsigned int i = 0 ; i < numArgOperands - skipTrailingOperands ; i++)
3947
3960
OC.Add (I.getArgOperand (i));
3948
3961
3949
3962
const DataLayout &DL = F.getDataLayout ();
@@ -4299,7 +4312,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4299
4312
case Intrinsic::aarch64_neon_st2:
4300
4313
case Intrinsic::aarch64_neon_st3:
4301
4314
case Intrinsic::aarch64_neon_st4: {
4302
- handleNEONVectorStoreIntrinsic (I);
4315
+ handleNEONVectorStoreIntrinsic (I, false );
4316
+ break ;
4317
+ }
4318
+
4319
+ case Intrinsic::aarch64_neon_st2lane:
4320
+ case Intrinsic::aarch64_neon_st3lane:
4321
+ case Intrinsic::aarch64_neon_st4lane: {
4322
+ handleNEONVectorStoreIntrinsic (I, true );
4303
4323
break ;
4304
4324
}
4305
4325
0 commit comments