@@ -828,10 +828,10 @@ define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) {
828
828
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0
829
829
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <16 x i32> zeroinitializer
830
830
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8
831
- ; CHECK-NEXT: [[TMP3:%.*]] = fmul <16 x double> [[WIDE_VEC]], [[TMP2]]
831
+ ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <16 x double> [[WIDE_VEC]], [[TMP2]]
832
832
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]]
833
833
; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP4]], align 8
834
- ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd <16 x double> [[WIDE_VEC34]], [[TMP3]]
834
+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x double> [[WIDE_VEC34]], [[TMP3]]
835
835
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP0]], 7
836
836
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP5]]
837
837
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 -56
@@ -937,5 +937,48 @@ define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) {
937
937
ret <4 x float > %extshuf
938
938
}
939
939
940
+ define void @maximal_legal_fpmath (ptr %addr1 , ptr %addr2 , ptr %result , float %val ) {
941
+ ; CHECK-LABEL: define void @maximal_legal_fpmath(
942
+ ; CHECK-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], ptr [[RESULT:%.*]], float [[VAL:%.*]]) {
943
+ ; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[VAL]], i64 0
944
+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[SPLATINSERT]], <4 x float> poison, <16 x i32> zeroinitializer
945
+ ; CHECK-NEXT: [[VEC1:%.*]] = load <16 x float>, ptr [[ADDR1]], align 4
946
+ ; CHECK-NEXT: [[VEC2:%.*]] = load <16 x float>, ptr [[ADDR2]], align 4
947
+ ; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <16 x float> [[TMP1]], [[VEC2]]
948
+ ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd reassoc contract <16 x float> [[VEC1]], [[TMP2]]
949
+ ; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[RESULT]], align 4
950
+ ; CHECK-NEXT: ret void
951
+ ;
952
+ %splatinsert = insertelement <4 x float > poison, float %val , i64 0
953
+ %incoming.vec = shufflevector <4 x float > %splatinsert , <4 x float > poison, <4 x i32 > zeroinitializer
954
+
955
+ %vec1 = load <16 x float >, ptr %addr1 , align 4
956
+ %strided.vec1 = shufflevector <16 x float > %vec1 , <16 x float > poison, <4 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 >
957
+ %strided.vec2 = shufflevector <16 x float > %vec1 , <16 x float > poison, <4 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 >
958
+ %strided.vec3 = shufflevector <16 x float > %vec1 , <16 x float > poison, <4 x i32 > <i32 2 , i32 6 , i32 10 , i32 14 >
959
+ %strided.vec4 = shufflevector <16 x float > %vec1 , <16 x float > poison, <4 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 >
960
+
961
+ %vec2 = load <16 x float >, ptr %addr2 , align 4
962
+ %strided.vec6 = shufflevector <16 x float > %vec2 , <16 x float > poison, <4 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 >
963
+ %strided.vec7 = shufflevector <16 x float > %vec2 , <16 x float > poison, <4 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 >
964
+ %strided.vec8 = shufflevector <16 x float > %vec2 , <16 x float > poison, <4 x i32 > <i32 2 , i32 6 , i32 10 , i32 14 >
965
+ %strided.vec9 = shufflevector <16 x float > %vec2 , <16 x float > poison, <4 x i32 > <i32 3 , i32 7 , i32 11 , i32 15 >
966
+
967
+ %1 = fmul fast <4 x float > %incoming.vec , %strided.vec6
968
+ %2 = fadd fast <4 x float > %strided.vec1 , %1
969
+ %3 = fmul contract <4 x float > %incoming.vec , %strided.vec7
970
+ %4 = fadd fast <4 x float > %strided.vec2 , %3
971
+ %5 = fmul contract reassoc <4 x float > %incoming.vec , %strided.vec8
972
+ %6 = fadd fast <4 x float > %strided.vec3 , %5
973
+ %7 = fmul contract reassoc <4 x float > %incoming.vec , %strided.vec9
974
+ %8 = fadd contract reassoc <4 x float > %strided.vec4 , %7
975
+
976
+ %9 = shufflevector <4 x float > %2 , <4 x float > %4 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
977
+ %10 = shufflevector <4 x float > %6 , <4 x float > %8 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
978
+ %interleaved.vec = shufflevector <8 x float > %9 , <8 x float > %10 , <16 x i32 > <i32 0 , i32 4 , i32 8 , i32 12 , i32 1 , i32 5 , i32 9 , i32 13 , i32 2 , i32 6 , i32 10 , i32 14 , i32 3 , i32 7 , i32 11 , i32 15 >
979
+ store <16 x float > %interleaved.vec , ptr %result , align 4
980
+
981
+ ret void
982
+ }
940
983
941
984
declare void @use (<4 x i8 >)
0 commit comments