Skip to content

Commit 381efa4

Browse files
committed
Revert rG67275263b3b781a "[X86] X86DAGToDAGISel - attempt to merge XMM/YMM loads with YMM/ZMM loads of the same ptr (llvm#73126)"
Missed an issue that we were calling continue from within the for loop - fixed version incoming shortly.
1 parent cf1e0c0 commit 381efa4

37 files changed

+6858
-6886
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,43 +1036,6 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
10361036

10371037
break;
10381038
}
1039-
case ISD::LOAD: {
1040-
// If this is a XMM/YMM load of the same lower bits as another YMM/ZMM
1041-
// load, then just extract the lower subvector and avoid the second load.
1042-
auto *Ld = cast<LoadSDNode>(N);
1043-
MVT VT = N->getSimpleValueType(0);
1044-
if (!ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
1045-
!(VT.is128BitVector() || VT.is256BitVector()))
1046-
break;
1047-
1048-
SDValue Ptr = Ld->getBasePtr();
1049-
SDValue Chain = Ld->getChain();
1050-
for (SDNode *User : Ptr->uses()) {
1051-
auto *UserLd = dyn_cast<LoadSDNode>(N);
1052-
MVT UserVT = User->getSimpleValueType(0);
1053-
if (User != N && UserLd && ISD::isNormalLoad(User) &&
1054-
UserLd->getBasePtr() == Ptr && UserLd->getChain() == Chain &&
1055-
!User->hasAnyUseOfValue(1) &&
1056-
(UserVT.is256BitVector() || UserVT.is512BitVector()) &&
1057-
UserVT.getSizeInBits() > VT.getSizeInBits()) {
1058-
SDLoc dl(N);
1059-
unsigned NumSubElts =
1060-
VT.getSizeInBits() / UserVT.getScalarSizeInBits();
1061-
MVT SubVT = MVT::getVectorVT(UserVT.getScalarType(), NumSubElts);
1062-
SDValue Extract = CurDAG->getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT,
1063-
SDValue(User, 0),
1064-
CurDAG->getIntPtrConstant(0, dl));
1065-
SDValue Res = CurDAG->getBitcast(VT, Extract);
1066-
--I;
1067-
SDValue To[] = {Res, SDValue(UserLd, 1)};
1068-
CurDAG->ReplaceAllUsesWith(N, To);
1069-
++I;
1070-
MadeChange = true;
1071-
continue;
1072-
}
1073-
}
1074-
break;
1075-
}
10761039
case ISD::VSELECT: {
10771040
// Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
10781041
EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();

llvm/test/CodeGen/X86/avx512-regcall-Mask.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,10 @@ define dso_local i64 @caller_argv64i1() #0 {
9898
; X32: # %bb.0: # %entry
9999
; X32-NEXT: pushl %edi
100100
; X32-NEXT: subl $88, %esp
101-
; X32-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
101+
; X32-NEXT: vmovddup {{.*#+}} xmm0 = [2,1,2,1]
102+
; X32-NEXT: # xmm0 = mem[0,0]
102103
; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp)
104+
; X32-NEXT: vbroadcastsd {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
103105
; X32-NEXT: vmovups %zmm0, (%esp)
104106
; X32-NEXT: movl $1, {{[0-9]+}}(%esp)
105107
; X32-NEXT: movl $2, {{[0-9]+}}(%esp)

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,9 +1401,9 @@ define <32 x bfloat> @pr63017_2() nounwind {
14011401
; AVXNC-NEXT: jne .LBB12_2
14021402
; AVXNC-NEXT: # %bb.1: # %cond.load
14031403
; AVXNC-NEXT: vpbroadcastw {{.*#+}} ymm1 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
1404-
; AVXNC-NEXT: vpbroadcastw {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
1405-
; AVXNC-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm2
1406-
; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
1404+
; AVXNC-NEXT: vpbroadcastw {{.*#+}} xmm0 = [49024,49024,49024,49024,49024,49024,49024,49024]
1405+
; AVXNC-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm0
1406+
; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
14071407
; AVXNC-NEXT: .LBB12_2: # %else
14081408
; AVXNC-NEXT: xorl %eax, %eax
14091409
; AVXNC-NEXT: testb %al, %al

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 26 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
209209
; AVX1-NEXT: vmovd %edi, %xmm0
210210
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
211211
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
212-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8]
213-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
214-
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1
212+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
213+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
215214
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
216215
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
217216
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -256,9 +255,8 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
256255
; AVX1-NEXT: vmovd %edi, %xmm0
257256
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
258257
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
259-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
260-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
261-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
258+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
259+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
262260
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
263261
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
264262
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -305,9 +303,8 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
305303
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
306304
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
307305
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
308-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
309-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
310-
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
306+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
307+
; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
311308
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
312309
; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
313310
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -424,15 +421,13 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
424421
; AVX1-NEXT: vmovd %edi, %xmm0
425422
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
426423
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
427-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
428-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
429-
; AVX1-NEXT: vpcmpeqq %xmm0, %xmm2, %xmm0
430-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
431-
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
432-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
433-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [16,32,64,128]
434-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
435-
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm2
424+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
425+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
426+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
427+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
428+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
429+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
430+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
436431
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
437432
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
438433
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
@@ -487,15 +482,13 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
487482
; AVX1-NEXT: vmovd %edi, %xmm0
488483
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
489484
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
490-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
491-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
492-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
493-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
494-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
495-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
496-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
497-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
498-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
485+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
486+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
487+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
488+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
489+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
490+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
491+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
499492
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
500493
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
501494
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
@@ -556,16 +549,17 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
556549
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
557550
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
558551
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
559-
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
552+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
553+
; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
560554
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
561555
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
562556
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
563557
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
564558
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
565-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
566-
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
567-
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
568-
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
559+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
560+
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
561+
; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
562+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
569563
; AVX1-NEXT: retq
570564
;
571565
; AVX2-LABEL: ext_i32_32i16:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
265265
; AVX1-NEXT: vmovd %edi, %xmm0
266266
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
267267
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
268-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8]
269-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
270-
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1
268+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
269+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
271270
; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
272271
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
273272
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -325,9 +324,8 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
325324
; AVX1-NEXT: vmovd %edi, %xmm0
326325
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
327326
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
328-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
329-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
330-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
327+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
328+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
331329
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
332330
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
333331
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -387,9 +385,8 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
387385
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
388386
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
389387
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
390-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
391-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
392-
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
388+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
389+
; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
393390
; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
394391
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
395392
; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -547,17 +544,15 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
547544
; AVX1-NEXT: vmovd %edi, %xmm0
548545
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
549546
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
550-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
551-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
552-
; AVX1-NEXT: vpcmpeqq %xmm0, %xmm2, %xmm0
553-
; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
554-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
555-
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
547+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
548+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
556549
; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
557-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
558-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [16,32,64,128]
559-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
560-
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm2
550+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
551+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
552+
; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
553+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
554+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
555+
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
561556
; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
562557
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
563558
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -628,17 +623,15 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
628623
; AVX1-NEXT: vmovd %edi, %xmm0
629624
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
630625
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
631-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
632-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
633-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
634-
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
635-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
636-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
626+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
627+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
637628
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
638-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
639-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
640-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
641-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
629+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
630+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
631+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
632+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
633+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
634+
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2
642635
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
643636
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
644637
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
@@ -712,21 +705,22 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
712705
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
713706
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
714707
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
715-
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm3
716-
; AVX1-NEXT: vpsrlw $15, %xmm3, %xmm3
708+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
709+
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
710+
; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
717711
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
718-
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
719-
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
712+
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
713+
; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
720714
; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
721-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
715+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
722716
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
723717
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
724718
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
725719
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
726-
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
720+
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
727721
; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
728722
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
729-
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
723+
; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
730724
; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
731725
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
732726
; AVX1-NEXT: retq

0 commit comments

Comments
 (0)