@@ -72,3 +72,47 @@ define <8 x i16> @combine_pmaddubsw_zero_commute(<16 x i8> %a0, <16 x i8> %a1) {
72
72
ret <8 x i16 > %1
73
73
}
74
74
75
+ define i32 @combine_pmaddubsw_constant () {
76
+ ; SSE-LABEL: combine_pmaddubsw_constant:
77
+ ; SSE: # %bb.0:
78
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
79
+ ; SSE-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
80
+ ; SSE-NEXT: pextrw $3, %xmm0, %eax
81
+ ; SSE-NEXT: cwtl
82
+ ; SSE-NEXT: retq
83
+ ;
84
+ ; AVX-LABEL: combine_pmaddubsw_constant:
85
+ ; AVX: # %bb.0:
86
+ ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
87
+ ; AVX-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
88
+ ; AVX-NEXT: vpextrw $3, %xmm0, %eax
89
+ ; AVX-NEXT: cwtl
90
+ ; AVX-NEXT: retq
91
+ %1 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > <i8 0 , i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 -6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >, <16 x i8 > <i8 1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 -8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 , i8 16 >)
92
+ %2 = extractelement <8 x i16 > %1 , i32 3 ; ((uint16_t)-6*7)+(7*-8) = (250*7)+(7*-8) = 1694
93
+ %3 = sext i16 %2 to i32
94
+ ret i32 %3
95
+ }
96
+
97
+ define i32 @combine_pmaddubsw_constant_sat () {
98
+ ; SSE-LABEL: combine_pmaddubsw_constant_sat:
99
+ ; SSE: # %bb.0:
100
+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
101
+ ; SSE-NEXT: pmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
102
+ ; SSE-NEXT: movd %xmm0, %eax
103
+ ; SSE-NEXT: cwtl
104
+ ; SSE-NEXT: retq
105
+ ;
106
+ ; AVX-LABEL: combine_pmaddubsw_constant_sat:
107
+ ; AVX: # %bb.0:
108
+ ; AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [255,255,2,3,4,5,250,7,8,9,10,11,12,13,14,15]
109
+ ; AVX-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,3,4,5,6,7,248,9,10,11,12,13,14,15,16]
110
+ ; AVX-NEXT: vmovd %xmm0, %eax
111
+ ; AVX-NEXT: cwtl
112
+ ; AVX-NEXT: retq
113
+ %1 = call <8 x i16 > @llvm.x86.ssse3.pmadd.ub.sw.128 (<16 x i8 > <i8 -1 , i8 -1 , i8 2 , i8 3 , i8 4 , i8 5 , i8 -6 , i8 7 , i8 8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 >, <16 x i8 > <i8 -128 , i8 -128 , i8 3 , i8 4 , i8 5 , i8 6 , i8 7 , i8 -8 , i8 9 , i8 10 , i8 11 , i8 12 , i8 13 , i8 14 , i8 15 , i8 16 >)
114
+ %2 = extractelement <8 x i16 > %1 , i32 0 ; add_sat_i16(((uint16_t)-1*-128),((uint16_t)-1*-128)_ = add_sat_i16(255*-128),(255*-128)) = sat_i16(-65280) = -32768
115
+ %3 = sext i16 %2 to i32
116
+ ret i32 %3
117
+ }
118
+
0 commit comments