Skip to content

Commit 9c8e597

Browse files
thurstondgithub-actions[bot]
authored andcommitted
Automerge: [msan] Handle SSE2 cvt(t?)ps2dq/cvt(t?)pd2dq and cvtpd2ps using handleSSEVectorConvertIntrinsicByProp (#132815)
cvt(t?)ps2dq/cvt(t?)pd2dq and cvtpd2ps are currently handled strictly. This patch handles them using handleSSEVectorConvertIntrinsicByProp (from llvm/llvm-project#130705), generalized to handle SSE intrinsics that do not have a rounding mode parameter.
2 parents 5ff93c9 + 8726e97 commit 9c8e597

File tree

7 files changed

+269
-293
lines changed

7 files changed

+269
-293
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3274,22 +3274,33 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
32743274
setOriginForNaryOp(I);
32753275
}
32763276

3277-
/// Handle x86 SSE single-precision to half-precision conversion.
3277+
/// Handle x86 SSE vector conversion.
32783278
///
3279-
/// e.g.,
3279+
/// e.g., single-precision to half-precision conversion:
32803280
/// <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
32813281
/// <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
3282+
///
3283+
/// floating-point to integer:
3284+
/// <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
3285+
/// <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
3286+
///
32823287
/// Note: if the output has more elements, they are zero-initialized (and
32833288
/// therefore the shadow will also be initialized).
32843289
///
32853290
/// This differs from handleSSEVectorConvertIntrinsic() because it
32863291
/// propagates uninitialized shadow (instead of checking the shadow).
3287-
void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I) {
3288-
assert(I.arg_size() == 2);
3292+
void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I,
3293+
bool HasRoundingMode) {
3294+
if (HasRoundingMode) {
3295+
assert(I.arg_size() == 2);
3296+
[[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);
3297+
assert(RoundingMode->getType()->isIntegerTy());
3298+
} else {
3299+
assert(I.arg_size() == 1);
3300+
}
3301+
32893302
Value *Src = I.getArgOperand(0);
32903303
assert(Src->getType()->isVectorTy());
3291-
[[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);
3292-
assert(RoundingMode->getType()->isIntegerTy());
32933304

32943305
// The return type might have more elements than the input.
32953306
// Temporarily shrink the return type's number of elements.
@@ -3305,7 +3316,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
33053316
Value *S0 = getShadow(&I, 0);
33063317

33073318
/// For scalars:
3308-
/// Since they are converting from floating-point to integer, the output is
3319+
/// Since they are converting to and/or from floating-point, the output is:
33093320
/// - fully uninitialized if *any* bit of the input is uninitialized
33103321
/// - fully ininitialized if all bits of the input are ininitialized
33113322
/// We apply the same principle on a per-field basis for vectors.
@@ -4653,6 +4664,31 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
46534664
handleSSEVectorConvertIntrinsic(I, 2);
46544665
break;
46554666

4667+
// TODO:
4668+
// <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>)
4669+
// <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>)
4670+
// <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
4671+
4672+
case Intrinsic::x86_vcvtps2ph_128:
4673+
case Intrinsic::x86_vcvtps2ph_256: {
4674+
handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/true);
4675+
break;
4676+
}
4677+
4678+
case Intrinsic::x86_sse2_cvtpd2ps:
4679+
case Intrinsic::x86_sse2_cvtps2dq:
4680+
case Intrinsic::x86_sse2_cvtpd2dq:
4681+
case Intrinsic::x86_sse2_cvttps2dq:
4682+
case Intrinsic::x86_sse2_cvttpd2dq:
4683+
case Intrinsic::x86_avx_cvt_pd2_ps_256:
4684+
case Intrinsic::x86_avx_cvt_ps2dq_256:
4685+
case Intrinsic::x86_avx_cvt_pd2dq_256:
4686+
case Intrinsic::x86_avx_cvtt_ps2dq_256:
4687+
case Intrinsic::x86_avx_cvtt_pd2dq_256: {
4688+
handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/false);
4689+
break;
4690+
}
4691+
46564692
case Intrinsic::x86_avx512_psll_w_512:
46574693
case Intrinsic::x86_avx512_psll_d_512:
46584694
case Intrinsic::x86_avx512_psll_q_512:
@@ -4998,12 +5034,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
49985034
break;
49995035
}
50005036

5001-
case Intrinsic::x86_vcvtps2ph_128:
5002-
case Intrinsic::x86_vcvtps2ph_256: {
5003-
handleSSEVectorConvertIntrinsicByProp(I);
5004-
break;
5005-
}
5006-
50075037
case Intrinsic::fshl:
50085038
case Intrinsic::fshr:
50095039
handleFunnelShift(I);

llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll

Lines changed: 28 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes
22
; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
3+
;
4+
; Handled strictly:
5+
; - i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
6+
; - i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
7+
; - <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
8+
; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
9+
; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
10+
; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
11+
; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2)
12+
; - <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
13+
; - void @llvm.x86.avx.vzeroall()
14+
; - void @llvm.x86.avx.vzeroupper()
315

416
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
517
target triple = "x86_64-unknown-linux-gnu"
@@ -303,15 +315,10 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 {
303315
; CHECK-LABEL: @test_x86_avx_cvt_pd2_ps_256(
304316
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
305317
; CHECK-NEXT: call void @llvm.donothing()
306-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
307-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
308-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
309-
; CHECK: 3:
310-
; CHECK-NEXT: call void @__msan_warning_noreturn()
311-
; CHECK-NEXT: unreachable
312-
; CHECK: 4:
318+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer
319+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
313320
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]])
314-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
321+
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
315322
; CHECK-NEXT: ret <4 x float> [[RES]]
316323
;
317324
%res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
@@ -324,15 +331,10 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 {
324331
; CHECK-LABEL: @test_x86_avx_cvt_pd2dq_256(
325332
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
326333
; CHECK-NEXT: call void @llvm.donothing()
327-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
328-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
329-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
330-
; CHECK: 3:
331-
; CHECK-NEXT: call void @__msan_warning_noreturn()
332-
; CHECK-NEXT: unreachable
333-
; CHECK: 4:
334+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer
335+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
334336
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]])
335-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
337+
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
336338
; CHECK-NEXT: ret <4 x i32> [[RES]]
337339
;
338340
%res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -345,15 +347,10 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 {
345347
; CHECK-LABEL: @test_x86_avx_cvt_ps2dq_256(
346348
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
347349
; CHECK-NEXT: call void @llvm.donothing()
348-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
349-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
350-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
351-
; CHECK: 3:
352-
; CHECK-NEXT: call void @__msan_warning_noreturn()
353-
; CHECK-NEXT: unreachable
354-
; CHECK: 4:
350+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
351+
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32>
355352
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]])
356-
; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
353+
; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
357354
; CHECK-NEXT: ret <8 x i32> [[RES]]
358355
;
359356
%res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
@@ -366,15 +363,10 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 {
366363
; CHECK-LABEL: @test_x86_avx_cvtt_pd2dq_256(
367364
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
368365
; CHECK-NEXT: call void @llvm.donothing()
369-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
370-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
371-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
372-
; CHECK: 3:
373-
; CHECK-NEXT: call void @__msan_warning_noreturn()
374-
; CHECK-NEXT: unreachable
375-
; CHECK: 4:
366+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer
367+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
376368
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]])
377-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
369+
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
378370
; CHECK-NEXT: ret <4 x i32> [[RES]]
379371
;
380372
%res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -387,15 +379,10 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 {
387379
; CHECK-LABEL: @test_x86_avx_cvtt_ps2dq_256(
388380
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
389381
; CHECK-NEXT: call void @llvm.donothing()
390-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
391-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
392-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
393-
; CHECK: 3:
394-
; CHECK-NEXT: call void @__msan_warning_noreturn()
395-
; CHECK-NEXT: unreachable
396-
; CHECK: 4:
382+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
383+
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32>
397384
; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]])
398-
; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
385+
; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
399386
; CHECK-NEXT: ret <8 x i32> [[RES]]
400387
;
401388
%res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
@@ -511,7 +498,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 {
511498
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
512499
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1
513500
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
514-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
501+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]]
515502
; CHECK: 5:
516503
; CHECK-NEXT: call void @__msan_warning_noreturn()
517504
; CHECK-NEXT: unreachable

llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
22
; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
3+
;
4+
; Handled strictly:
5+
; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2
6+
; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2
7+
; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
8+
; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5
9+
; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5
10+
; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5
11+
; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5
12+
; - <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16)
13+
; - <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b)
14+
; - void @llvm.x86.mmx.emms()
15+
; - <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2)
16+
; - i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2)
317

418
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
519
target triple = "x86_64-unknown-linux-gnu"

0 commit comments

Comments
 (0)