Skip to content

Commit b72f144

Browse files
committed
AMDGPU/GlobalISel: Better code for one case of G_SHUFFLE_VECTOR on v2i16
Reviewers: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74987
1 parent e9c79a7 commit b72f144

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2191,7 +2191,17 @@ bool AMDGPUInstructionSelector::selectG_SHUFFLE_VECTOR(
21912191
.addReg(SrcVec)
21922192
.addImm(16);
21932193
}
2194-
} else if (isZeroOrUndef(Mask[0]) && Mask[1] == 0) {
2194+
} else if (Mask[0] == -1 && Mask[1] == 0) {
2195+
if (IsVALU) {
2196+
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), DstReg)
2197+
.addImm(16)
2198+
.addReg(SrcVec);
2199+
} else {
2200+
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHL_B32), DstReg)
2201+
.addReg(SrcVec)
2202+
.addImm(16);
2203+
}
2204+
} else if (Mask[0] == 0 && Mask[1] == 0) {
21952205
if (IsVALU) {
21962206
// Write low half of the register into the high half.
21972207
MachineInstr *MovSDWA =

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shuffle-vector.v2s16.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ body: |
5656
; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_0
5757
; GFX9: liveins: $vgpr0, $vgpr1
5858
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
59-
; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0)
60-
; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
59+
; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
60+
; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
6161
%0:vgpr(<2 x s16>) = COPY $vgpr0
6262
%1:vgpr(<2 x s16>) = COPY $vgpr1
6363
%2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
@@ -143,8 +143,8 @@ body: |
143143
; GFX9-LABEL: name: v_shufflevector_v2s16_v2s16_u_2
144144
; GFX9: liveins: $vgpr0, $vgpr1
145145
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
146-
; GFX9: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[COPY]], 0, 5, 2, 4, implicit $exec, implicit [[COPY]](tied-def 0)
147-
; GFX9: $vgpr0 = COPY [[V_MOV_B32_sdwa]]
146+
; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY]], implicit $exec
147+
; GFX9: $vgpr0 = COPY [[V_LSHLREV_B32_e64_]]
148148
%0:vgpr(<2 x s16>) = COPY $vgpr0
149149
%1:vgpr(<2 x s16>) = COPY $vgpr1
150150
%2:vgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)
@@ -424,8 +424,8 @@ body: |
424424
; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_0
425425
; GFX9: liveins: $sgpr0, $sgpr1
426426
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
427-
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
428-
; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
427+
; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
428+
; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]]
429429
%0:sgpr(<2 x s16>) = COPY $sgpr0
430430
%1:sgpr(<2 x s16>) = COPY $sgpr1
431431
%2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 0)
@@ -511,8 +511,8 @@ body: |
511511
; GFX9-LABEL: name: s_shufflevector_v2s16_v2s16_u_2
512512
; GFX9: liveins: $sgpr0, $sgpr1
513513
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr1
514-
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[COPY]]
515-
; GFX9: $sgpr0 = COPY [[S_PACK_LL_B32_B16_]]
514+
; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
515+
; GFX9: $sgpr0 = COPY [[S_LSHL_B32_]]
516516
%0:sgpr(<2 x s16>) = COPY $sgpr0
517517
%1:sgpr(<2 x s16>) = COPY $sgpr1
518518
%2:sgpr(<2 x s16>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(undef, 2)

0 commit comments

Comments
 (0)