Skip to content

Commit bc9823c

Browse files
committed
[X86][BF16] Change MVT to EVT in combineFP_EXTEND
Fixes: llvm#92471
1 parent 9bffe79 commit bc9823c

File tree

2 files changed

+55
-2
lines changed

2 files changed

+55
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56802,12 +56802,12 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
5680256802

5680356803
assert(!IsStrict && "Strict FP doesn't support BF16");
5680456804
if (VT.getVectorElementType() == MVT::f64) {
56805-
MVT TmpVT = VT.getSimpleVT().changeVectorElementType(MVT::f32);
56805+
EVT TmpVT = VT.changeVectorElementType(MVT::f32);
5680656806
return DAG.getNode(ISD::FP_EXTEND, dl, VT,
5680756807
DAG.getNode(ISD::FP_EXTEND, dl, TmpVT, Src));
5680856808
}
5680956809
assert(VT.getVectorElementType() == MVT::f32 && "Unexpected fpext");
56810-
MVT NVT = SrcVT.getSimpleVT().changeVectorElementType(MVT::i32);
56810+
EVT NVT = SrcVT.changeVectorElementType(MVT::i32);
5681156811
Src = DAG.getBitcast(SrcVT.changeTypeToInteger(), Src);
5681256812
Src = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Src);
5681356813
Src = DAG.getNode(ISD::SHL, dl, NVT, Src, DAG.getConstant(16, dl, NVT));

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,3 +2464,56 @@ define float @trunc_ext(float %a) nounwind {
24642464
%c = fpext bfloat %b to float
24652465
ret float %c
24662466
}
2467+
2468+
define void @PR92471(ptr %0, ptr %1) nounwind {
2469+
; X86-LABEL: PR92471:
2470+
; X86: # %bb.0:
2471+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
2472+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
2473+
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2474+
; X86-NEXT: vpinsrd $1, 4(%ecx), %xmm0, %xmm0
2475+
; X86-NEXT: vpinsrd $2, 8(%ecx), %xmm0, %xmm0
2476+
; X86-NEXT: vpinsrw $6, 12(%ecx), %xmm0, %xmm0
2477+
; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2478+
; X86-NEXT: vpslld $16, %ymm0, %ymm0
2479+
; X86-NEXT: vextracti128 $1, %ymm0, %xmm1
2480+
; X86-NEXT: vpextrd $2, %xmm1, 24(%eax)
2481+
; X86-NEXT: vpextrd $1, %xmm1, 20(%eax)
2482+
; X86-NEXT: vmovd %xmm1, 16(%eax)
2483+
; X86-NEXT: vmovdqu %xmm0, (%eax)
2484+
; X86-NEXT: vzeroupper
2485+
; X86-NEXT: retl
2486+
;
2487+
; SSE2-LABEL: PR92471:
2488+
; SSE2: # %bb.0:
2489+
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
2490+
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
2491+
; SSE2-NEXT: pinsrw $2, 12(%rdi), %xmm1
2492+
; SSE2-NEXT: pxor %xmm2, %xmm2
2493+
; SSE2-NEXT: pxor %xmm3, %xmm3
2494+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
2495+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
2496+
; SSE2-NEXT: movdqu %xmm2, (%rsi)
2497+
; SSE2-NEXT: movq %xmm3, 16(%rsi)
2498+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
2499+
; SSE2-NEXT: movd %xmm0, 24(%rsi)
2500+
; SSE2-NEXT: retq
2501+
;
2502+
; AVX-LABEL: PR92471:
2503+
; AVX: # %bb.0:
2504+
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2505+
; AVX-NEXT: vpinsrd $2, 8(%rdi), %xmm0, %xmm0
2506+
; AVX-NEXT: vpinsrw $6, 12(%rdi), %xmm0, %xmm0
2507+
; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2508+
; AVX-NEXT: vpslld $16, %ymm0, %ymm0
2509+
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
2510+
; AVX-NEXT: vpextrd $2, %xmm1, 24(%rsi)
2511+
; AVX-NEXT: vmovq %xmm1, 16(%rsi)
2512+
; AVX-NEXT: vmovdqu %xmm0, (%rsi)
2513+
; AVX-NEXT: vzeroupper
2514+
; AVX-NEXT: retq
2515+
%3 = load <7 x bfloat>, ptr %0, align 2
2516+
%4 = fpext <7 x bfloat> %3 to <7 x float>
2517+
store <7 x float> %4, ptr %1, align 4
2518+
ret void
2519+
}

0 commit comments

Comments
 (0)