@@ -3873,11 +3873,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3873
3873
setOriginForNaryOp (I);
3874
3874
}
3875
3875
3876
- // / Handle Arm NEON vector store intrinsics (vst{2,3,4}).
3876
+ // / Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4} ).
3877
3877
// /
3878
3878
// / Arm NEON vector store intrinsics have the output address (pointer) as the
3879
3879
// / last argument, with the initial arguments being the inputs. They return
3880
3880
// / void.
3881
+ // /
3882
+ // / The difference between st1_x4 and st4 is that the latter interleaves the
3883
+ // / output e.g., st4 (A, B, C, D, P) writes abcdabcdabcdabcd... into *P, while
3884
+ // / st1_x4 (A, B, C, D, P) writes aaaa...bbbb...cccc...dddd... into *P.
3885
+ // / Since we apply the cloned instruction to the shadow, we can reuse the same
3886
+ // / logic.
3881
3887
void handleNEONVectorStoreIntrinsic (IntrinsicInst &I) {
3882
3888
IRBuilder<> IRB (&I);
3883
3889
@@ -3892,11 +3898,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3892
3898
if (ClCheckAccessAddress)
3893
3899
insertShadowCheck (Addr, &I);
3894
3900
3901
+ SmallVector<Value *, 8 > Shadows;
3895
3902
// Every arg operand, other than the last one, is an input vector
3896
- IntrinsicInst *ShadowI = cast<IntrinsicInst>(I.clone ());
3897
3903
for (int i = 0 ; i < numArgOperands - 1 ; i++) {
3898
3904
assert (isa<FixedVectorType>(I.getArgOperand (i)->getType ()));
3899
- ShadowI->setArgOperand (i, getShadow (&I, i));
3905
+ Value *Shadow = getShadow (&I, i);
3906
+ Shadows.append (1 , Shadow);
3900
3907
}
3901
3908
3902
3909
// MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3914,13 +3921,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3914
3921
cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getElementType (),
3915
3922
cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getNumElements () *
3916
3923
(numArgOperands - 1 ));
3917
- Type *ShadowTy = getShadowTy (OutputVectorTy);
3918
- Value *ShadowPtr, *OriginPtr;
3924
+ Type *OutputShadowTy = getShadowTy (OutputVectorTy);
3925
+
3926
+ Value *OutputShadowPtr, *OutputOriginPtr;
3919
3927
// AArch64 NEON does not need alignment (unless OS requires it)
3920
- std::tie (ShadowPtr, OriginPtr) =
3921
- getShadowOriginPtr (Addr, IRB, ShadowTy, Align (1 ), /* isStore*/ true );
3922
- ShadowI->setArgOperand (numArgOperands - 1 , ShadowPtr);
3923
- ShadowI->insertAfter (&I);
3928
+ std::tie (OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr (
3929
+ Addr, IRB, OutputShadowTy, Align (1 ), /* isStore*/ true );
3930
+ Shadows.append (1 , OutputShadowPtr);
3931
+
3932
+ CallInst *CI =
3933
+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), Shadows);
3934
+ setShadow (&I, CI);
3924
3935
3925
3936
if (MS.TrackOrigins ) {
3926
3937
// TODO: if we modelled the vst* instruction more precisely, we could
@@ -3932,7 +3943,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
3932
3943
OC.Add (I.getArgOperand (i));
3933
3944
3934
3945
const DataLayout &DL = F.getDataLayout ();
3935
- OC.DoneAndStoreOrigin (DL.getTypeStoreSize (OutputVectorTy), OriginPtr);
3946
+ OC.DoneAndStoreOrigin (DL.getTypeStoreSize (OutputVectorTy),
3947
+ OutputOriginPtr);
3936
3948
}
3937
3949
}
3938
3950
@@ -4277,6 +4289,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
4277
4289
setOrigin (&I, getCleanOrigin ());
4278
4290
break ;
4279
4291
4292
+ case Intrinsic::aarch64_neon_st1x2:
4293
+ case Intrinsic::aarch64_neon_st1x3:
4294
+ case Intrinsic::aarch64_neon_st1x4:
4280
4295
case Intrinsic::aarch64_neon_st2:
4281
4296
case Intrinsic::aarch64_neon_st3:
4282
4297
case Intrinsic::aarch64_neon_st4: {
0 commit comments