@@ -12,26 +12,79 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
12
12
; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
13
13
; CHECK: [[FOR_BODY_PREHEADER]]:
14
14
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
15
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
16
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
17
+ ; CHECK: [[VECTOR_PH]]:
18
+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
19
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
20
+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
21
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <2 x double> poison, double [[Z]], i64 0
22
+ ; CHECK-NEXT: [[BROADCAST_SPLAT20:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT19]], <2 x double> poison, <2 x i32> zeroinitializer
23
+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
24
+ ; CHECK: [[VECTOR_BODY]]:
25
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
27
+ ; CHECK-NEXT: [[VEC_PHI15:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
28
+ ; CHECK-NEXT: [[VEC_PHI16:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
29
+ ; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
30
+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
31
+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 8
32
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
33
+ ; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x float>, ptr [[TMP23]], align 4
34
+ ; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
35
+ ; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD18]] to <2 x double>
36
+ ; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP2]]
37
+ ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[BROADCAST_SPLAT]], [[TMP3]]
38
+ ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP4]], [[BROADCAST_SPLAT20]]
39
+ ; CHECK-NEXT: [[TMP7:%.*]] = fsub fast <2 x double> [[TMP5]], [[BROADCAST_SPLAT20]]
40
+ ; CHECK-NEXT: [[TMP8:%.*]] = fcmp fast ogt <2 x double> [[TMP6]], zeroinitializer
41
+ ; CHECK-NEXT: [[TMP9:%.*]] = fcmp fast ogt <2 x double> [[TMP7]], zeroinitializer
42
+ ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP6]]
43
+ ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]]
44
+ ; CHECK-NEXT: [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
45
+ ; CHECK-NEXT: [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
46
+ ; CHECK-NEXT: [[TMP14]] = fadd fast <2 x double> [[TMP12]], [[VEC_PHI16]]
47
+ ; CHECK-NEXT: [[TMP15]] = fadd fast <2 x double> [[TMP13]], [[VEC_PHI17]]
48
+ ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
49
+ ; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
50
+ ; CHECK-NEXT: [[TMP18]] = fadd fast <2 x double> [[TMP16]], [[VEC_PHI]]
51
+ ; CHECK-NEXT: [[TMP19]] = fadd fast <2 x double> [[TMP17]], [[VEC_PHI15]]
52
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV]], 4
53
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
54
+ ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
55
+ ; CHECK: [[MIDDLE_BLOCK]]:
56
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP19]], [[TMP18]]
57
+ ; CHECK-NEXT: [[TMP21:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]])
58
+ ; CHECK-NEXT: [[BIN_RDX21:%.*]] = fadd fast <2 x double> [[TMP15]], [[TMP14]]
59
+ ; CHECK-NEXT: [[TMP22:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX21]])
60
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
61
+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
62
+ ; CHECK: [[FOR_BODY_PREHEADER23]]:
63
+ ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
64
+ ; CHECK-NEXT: [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
65
+ ; CHECK-NEXT: [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
15
66
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
16
67
; CHECK: [[FOR_BODY]]:
17
- ; CHECK-NEXT: [[INDVARS_IV :%.*]] = phi i64 [ 0 , %[[FOR_BODY_PREHEADER ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
18
- ; CHECK-NEXT: [[V1_011 :%.*]] = phi double [ 0.000000e+00 , %[[FOR_BODY_PREHEADER ]] ], [ [[V1_1:%.* ]], %[[FOR_BODY ]] ]
19
- ; CHECK-NEXT: [[V0_010 :%.*]] = phi double [ 0.000000e+00 , %[[FOR_BODY_PREHEADER ]] ], [ [[V0_1:%.* ]], %[[FOR_BODY ]] ]
20
- ; CHECK-NEXT: [[ARRAYIDX :%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV ]]
21
- ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX ]], align 4
68
+ ; CHECK-NEXT: [[INDVARS_IV1 :%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]] , %[[FOR_BODY ]] ], [ [[INDVARS_IV_PH ]], %[[FOR_BODY_PREHEADER23 ]] ]
69
+ ; CHECK-NEXT: [[V1_012 :%.*]] = phi double [ [[V1_2:%.*]] , %[[FOR_BODY ]] ], [ [[V1_012_PH ]], %[[FOR_BODY_PREHEADER23 ]] ]
70
+ ; CHECK-NEXT: [[V0_011 :%.*]] = phi double [ [[V0_2:%.*]] , %[[FOR_BODY ]] ], [ [[V0_011_PH ]], %[[FOR_BODY_PREHEADER23 ]] ]
71
+ ; CHECK-NEXT: [[ARRAYIDX1 :%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1 ]]
72
+ ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1 ]], align 4
22
73
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
23
74
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
24
75
; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
25
76
; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
26
- ; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]]
27
77
; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
28
- ; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]]
29
- ; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]]
30
- ; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]]
31
- ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
78
+ ; CHECK-NEXT: [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
79
+ ; CHECK-NEXT: [[V0_2]] = fadd fast double [[ADD8]], [[V0_011]]
80
+ ; CHECK-NEXT: [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
81
+ ; CHECK-NEXT: [[V1_2]] = fadd fast double [[ADD4]], [[V1_012]]
82
+ ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
32
83
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
33
- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.* ]], label %[[FOR_BODY]]
84
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+ ]]
34
85
; CHECK: [[FOR_END_LOOPEXIT]]:
86
+ ; CHECK-NEXT: [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
87
+ ; CHECK-NEXT: [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
35
88
; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
36
89
; CHECK-NEXT: br label %[[FOR_END]]
37
90
; CHECK: [[FOR_END]]:
@@ -292,3 +345,9 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
292
345
declare void @resample (i32 noundef, ptr noundef)
293
346
declare double @llvm.exp2.f64 (double )
294
347
declare void @llvm.lifetime.end.p0 (i64 immarg, ptr nocapture )
348
+ ;.
349
+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
350
+ ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
351
+ ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
352
+ ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
353
+ ;.
0 commit comments