@@ -3873,38 +3873,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3873
3873
setOriginForNaryOp (I);
3874
3874
}
3875
3875
3876
- // / Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
3876
+ // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
3877
+ // / and vst{2,3,4}lane).
3877
3878
// /
3878
3879
// / Arm NEON vector store intrinsics have the output address (pointer) as the
3879
- // / last argument, with the initial arguments being the inputs. They return
3880
- // / void.
3880
+ // / last argument, with the initial arguments being the inputs (and lane
3881
+ // / number for vst{2,3,4}lane). They return void.
3881
3882
// /
3882
3883
// / - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
3883
3884
// / abcdabcdabcdabcd... into *outP
3884
3885
// / - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
3885
3886
// / writes aaaa...bbbb...cccc...dddd... into *outP
3887
+ // / - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
3886
3888
// / These instructions can all be instrumented with essentially the same
3887
3889
// / MSan logic, simply by applying the corresponding intrinsic to the shadow.
3888
- void handleNEONVectorStoreIntrinsic (IntrinsicInst &I) {
3890
+ void handleNEONVectorStoreIntrinsic (IntrinsicInst &I, bool useLane ) {
3889
3891
IRBuilder<> IRB (&I);
3890
3892
3891
3893
// Don't use getNumOperands() because it includes the callee
3892
3894
int numArgOperands = I.arg_size ();
3893
- assert (numArgOperands >= 1 );
3894
3895
3895
- // The last arg operand is the output
3896
+ // The last arg operand is the output (pointer)
3897
+ assert (numArgOperands >= 1 );
3896
3898
Value *Addr = I.getArgOperand (numArgOperands - 1 );
3897
3899
assert (Addr->getType ()->isPointerTy ());
3900
+ int skipTrailingOperands = 1 ;
3898
3901
3899
3902
if (ClCheckAccessAddress)
3900
3903
insertShadowCheck (Addr, &I);
3901
3904
3902
- SmallVector<Value *, 8 > Shadows;
3903
- // Every arg operand, other than the last one, is an input vector
3904
- for (int i = 0 ; i < numArgOperands - 1 ; i++) {
3905
+ // Second-last operand is the lane number (for vst{2,3,4}lane)
3906
+ if (useLane) {
3907
+ skipTrailingOperands++;
3908
+ assert (numArgOperands >= static_cast <int >(skipTrailingOperands));
3909
+ assert (isa<IntegerType>(
3910
+ I.getArgOperand (numArgOperands - skipTrailingOperands)->getType ()));
3911
+ }
3912
+
3913
+ SmallVector<Value *, 8 > ShadowArgs;
3914
+ // All the initial operands are the inputs
3915
+ for (int i = 0 ; i < numArgOperands - skipTrailingOperands; i++) {
3905
3916
assert (isa<FixedVectorType>(I.getArgOperand (i)->getType ()));
3906
3917
Value *Shadow = getShadow (&I, i);
3907
- Shadows .append (1 , Shadow);
3918
+ ShadowArgs .append (1 , Shadow);
3908
3919
}
3909
3920
3910
3921
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,29 +3932,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3921
3932
FixedVectorType *OutputVectorTy = FixedVectorType::get (
3922
3933
cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getElementType (),
3923
3934
cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getNumElements () *
3924
- (numArgOperands - 1 ));
3935
+ (numArgOperands - skipTrailingOperands ));
3925
3936
Type *OutputShadowTy = getShadowTy (OutputVectorTy);
3926
3937
3938
+ if (useLane)
3939
+ ShadowArgs.append (1 ,
3940
+ I.getArgOperand (numArgOperands - skipTrailingOperands));
3941
+
3927
3942
Value *OutputShadowPtr, *OutputOriginPtr;
3928
3943
// AArch64 NEON does not need alignment (unless OS requires it)
3929
3944
std::tie (OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr (
3930
3945
Addr, IRB, OutputShadowTy, Align (1 ), /* isStore*/ true );
3931
- Shadows .append (1 , OutputShadowPtr);
3946
+ ShadowArgs .append (1 , OutputShadowPtr);
3932
3947
3933
- // CreateIntrinsic will select the correct (integer) type for the
3934
- // intrinsic; the original instruction I may have either integer- or
3935
- // float-type inputs.
3936
3948
CallInst *CI =
3937
- IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), Shadows );
3949
+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs );
3938
3950
setShadow (&I, CI);
3939
3951
3940
3952
if (MS.TrackOrigins ) {
3941
3953
// TODO: if we modelled the vst* instruction more precisely, we could
3942
3954
// more accurately track the origins (e.g., if both inputs are
3943
3955
// uninitialized for vst2, we currently blame the second input, even
3944
3956
// though part of the output depends only on the first input).
3957
+ //
3958
+ // This is particularly imprecise for vst{2,3,4}lane, since only one
3959
+ // lane of each input is actually copied to the output.
3945
3960
OriginCombiner OC (this , IRB);
3946
- for (int i = 0 ; i < numArgOperands - 1 ; i++)
3961
+ for (int i = 0 ; i < numArgOperands - skipTrailingOperands ; i++)
3947
3962
OC.Add (I.getArgOperand (i));
3948
3963
3949
3964
const DataLayout &DL = F.getDataLayout ();
@@ -4316,7 +4331,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4316
4331
case Intrinsic::aarch64_neon_st2:
4317
4332
case Intrinsic::aarch64_neon_st3:
4318
4333
case Intrinsic::aarch64_neon_st4: {
4319
- handleNEONVectorStoreIntrinsic (I);
4334
+ handleNEONVectorStoreIntrinsic (I, false );
4335
+ break ;
4336
+ }
4337
+
4338
+ case Intrinsic::aarch64_neon_st2lane:
4339
+ case Intrinsic::aarch64_neon_st3lane:
4340
+ case Intrinsic::aarch64_neon_st4lane: {
4341
+ handleNEONVectorStoreIntrinsic (I, true );
4320
4342
break ;
4321
4343
}
4322
4344
0 commit comments