Skip to content

Commit 0e88d4c

Browse files
committed
[msan] Handle SSE2 cvtps2dq/cvtpd2dq using handleSSEVectorConvertIntrinsicByProp
This generalizes handleSSEVectorConvertIntrinsicByProp from llvm#130705 to handle SSE intrinsics that do not have a rounding mode parameter. cvtps2dq/cvtpd2dq were previously handled strictly.
1 parent e75f586 commit 0e88d4c

File tree

3 files changed

+85
-89
lines changed

3 files changed

+85
-89
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3274,22 +3274,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
32743274
setOriginForNaryOp(I);
32753275
}
32763276

3277-
/// Handle x86 SSE single-precision to half-precision conversion.
3277+
/// Handle x86 SSE vector conversion.
32783278
///
3279-
/// e.g.,
3279+
/// e.g., single-precision to half-precision conversion:
32803280
/// <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
32813281
/// <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
3282+
///
3283+
/// floating-point to integer:
3284+
/// <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
3285+
/// <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
3286+
///
32823287
/// Note: if the output has more elements, they are zero-initialized (and
32833288
/// therefore the shadow will also be initialized).
32843289
///
32853290
/// This differs from handleSSEVectorConvertIntrinsic() because it
32863291
/// propagates uninitialized shadow (instead of checking the shadow).
3287-
void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I) {
3288-
assert(I.arg_size() == 2);
3292+
void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I, bool HasRoundingMode = false) {
3293+
if (HasRoundingMode) {
3294+
assert(I.arg_size() == 2);
3295+
[[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);
3296+
assert(RoundingMode->getType()->isIntegerTy());
3297+
} else {
3298+
assert(I.arg_size() == 1);
3299+
}
3300+
32893301
Value *Src = I.getArgOperand(0);
32903302
assert(Src->getType()->isVectorTy());
3291-
[[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);
3292-
assert(RoundingMode->getType()->isIntegerTy());
32933303

32943304
// The return type might have more elements than the input.
32953305
// Temporarily shrink the return type's number of elements.
@@ -3305,7 +3315,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
33053315
Value *S0 = getShadow(&I, 0);
33063316

33073317
/// For scalars:
3308-
/// Since they are converting from floating-point to integer, the output is
3318+
/// Since they are converting from floating-point to integer (or
3319+
/// vice-versa), the output is
33093320
/// - fully uninitialized if *any* bit of the input is uninitialized
33103321
/// - fully ininitialized if all bits of the input are ininitialized
33113322
/// We apply the same principle on a per-field basis for vectors.
@@ -4653,6 +4664,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
46534664
handleSSEVectorConvertIntrinsic(I, 2);
46544665
break;
46554666

4667+
// TODO:
4668+
// <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>)
4669+
// <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>)
4670+
// <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
4671+
4672+
case Intrinsic::x86_vcvtps2ph_128:
4673+
case Intrinsic::x86_vcvtps2ph_256: {
4674+
handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ true);
4675+
break;
4676+
}
4677+
4678+
case Intrinsic::x86_sse2_cvtps2dq:
4679+
case Intrinsic::x86_sse2_cvtpd2dq: {
4680+
handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ false);
4681+
break;
4682+
}
4683+
46564684
case Intrinsic::x86_avx512_psll_w_512:
46574685
case Intrinsic::x86_avx512_psll_d_512:
46584686
case Intrinsic::x86_avx512_psll_q_512:
@@ -4998,12 +5026,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
49985026
break;
49995027
}
50005028

5001-
case Intrinsic::x86_vcvtps2ph_128:
5002-
case Intrinsic::x86_vcvtps2ph_256: {
5003-
handleSSEVectorConvertIntrinsicByProp(I);
5004-
break;
5005-
}
5006-
50075029
case Intrinsic::fshl:
50085030
case Intrinsic::fshr:
50095031
handleFunnelShift(I);

llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -160,15 +160,11 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
160160
; CHECK-LABEL: @test_x86_sse2_cvtpd2dq(
161161
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
162162
; CHECK-NEXT: call void @llvm.donothing()
163-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
164-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
165-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
166-
; CHECK: 3:
167-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
168-
; CHECK-NEXT: unreachable
169-
; CHECK: 4:
163+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
164+
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
165+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
170166
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
171-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
167+
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
172168
; CHECK-NEXT: ret <4 x i32> [[RES]]
173169
;
174170
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -181,18 +177,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
181177
; CHECK-LABEL: @test_mm_cvtpd_epi32_zext(
182178
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
183179
; CHECK-NEXT: call void @llvm.donothing()
184-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
185-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
186-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
187-
; CHECK: 3:
188-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
189-
; CHECK-NEXT: unreachable
190-
; CHECK: 4:
180+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
181+
; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
182+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
191183
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
192-
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
184+
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
185+
; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
193186
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
194-
; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
195-
; CHECK-NEXT: ret <2 x i64> [[BC]]
187+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
188+
; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
189+
; CHECK-NEXT: ret <2 x i64> [[BC1]]
196190
;
197191
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
198192
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -206,28 +200,26 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
206200
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
207201
; CHECK-NEXT: call void @llvm.donothing()
208202
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
209-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
203+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
210204
; CHECK: 2:
211-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
205+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
212206
; CHECK-NEXT: unreachable
213207
; CHECK: 3:
214208
; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
215209
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
216210
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
217211
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
218212
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
219-
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
220-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
221-
; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
222-
; CHECK: 8:
223-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
224-
; CHECK-NEXT: unreachable
225-
; CHECK: 9:
213+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer
214+
; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i1> [[TMP7]] to <2 x i32>
215+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
226216
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
227-
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
217+
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
218+
; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
228219
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
229-
; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
230-
; CHECK-NEXT: ret <2 x i64> [[BC]]
220+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
221+
; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
222+
; CHECK-NEXT: ret <2 x i64> [[BC1]]
231223
;
232224
%a0 = load <2 x double>, ptr %p0
233225
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
@@ -315,15 +307,10 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
315307
; CHECK-LABEL: @test_x86_sse2_cvtps2dq(
316308
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
317309
; CHECK-NEXT: call void @llvm.donothing()
318-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
319-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
320-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
321-
; CHECK: 3:
322-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
323-
; CHECK-NEXT: unreachable
324-
; CHECK: 4:
310+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
311+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
325312
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
326-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
313+
; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
327314
; CHECK-NEXT: ret <4 x i32> [[RES]]
328315
;
329316
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]

llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -169,15 +169,11 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
169169
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
170170
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
171171
; CHECK-NEXT: call void @llvm.donothing()
172-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
173-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
174-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]]
175-
; CHECK: 4:
176-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
177-
; CHECK-NEXT: unreachable
178-
; CHECK: 5:
172+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
173+
; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
174+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
179175
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
180-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
176+
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
181177
; CHECK-NEXT: ret <4 x i32> [[RES]]
182178
;
183179
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -191,18 +187,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
191187
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
192188
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
193189
; CHECK-NEXT: call void @llvm.donothing()
194-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
195-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
196-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
197-
; CHECK: 4:
198-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
199-
; CHECK-NEXT: unreachable
200-
; CHECK: 5:
190+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
191+
; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
192+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
201193
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
202-
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
194+
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
195+
; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
203196
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
204-
; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
205-
; CHECK-NEXT: ret <2 x i64> [[BC]]
197+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
198+
; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
199+
; CHECK-NEXT: ret <2 x i64> [[BC1]]
206200
;
207201
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
208202
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -217,28 +211,26 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
217211
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
218212
; CHECK-NEXT: call void @llvm.donothing()
219213
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
220-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
214+
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP7:%.*]], !prof [[PROF1:![0-9]+]]
221215
; CHECK: 3:
222-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
216+
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
223217
; CHECK-NEXT: unreachable
224218
; CHECK: 4:
225219
; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
226220
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
227221
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -2147483649
228222
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
229223
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
230-
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
231-
; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
232-
; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
233-
; CHECK: 9:
234-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
235-
; CHECK-NEXT: unreachable
236-
; CHECK: 10:
224+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer
225+
; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i1> [[TMP8]] to <2 x i32>
226+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
237227
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
238-
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
228+
; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
229+
; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
239230
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
240-
; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
241-
; CHECK-NEXT: ret <2 x i64> [[BC]]
231+
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
232+
; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
233+
; CHECK-NEXT: ret <2 x i64> [[BC1]]
242234
;
243235
%a0 = load <2 x double>, ptr %p0
244236
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
@@ -330,15 +322,10 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
330322
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
331323
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
332324
; CHECK-NEXT: call void @llvm.donothing()
333-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
334-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
335-
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
336-
; CHECK: 4:
337-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
338-
; CHECK-NEXT: unreachable
339-
; CHECK: 5:
325+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
326+
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
340327
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
341-
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
328+
; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
342329
; CHECK-NEXT: ret <4 x i32> [[RES]]
343330
;
344331
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]

0 commit comments

Comments
 (0)