Skip to content

Commit ab1dcac

Browse files
authored
[AMDGPU][RegBankInfo] Promote scalar i16 and/or/xor to i32 (#131306)
See #64591
1 parent 2709998 commit ab1dcac

File tree

5 files changed

+401
-450
lines changed

5 files changed

+401
-450
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2416,9 +2416,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
24162416
Register DstReg = MI.getOperand(0).getReg();
24172417
LLT DstTy = MRI.getType(DstReg);
24182418

2419-
if (DstTy.getSizeInBits() == 1) {
2420-
const RegisterBank *DstBank =
2419+
const RegisterBank *DstBank =
24212420
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2421+
2422+
if (DstTy.getSizeInBits() == 1) {
24222423
if (DstBank == &AMDGPU::VCCRegBank)
24232424
break;
24242425

@@ -2432,6 +2433,27 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
24322433
return;
24332434
}
24342435

2436+
if (DstTy.getSizeInBits() == 16 && DstBank == &AMDGPU::SGPRRegBank) {
2437+
const LLT S32 = LLT::scalar(32);
2438+
MachineBasicBlock *MBB = MI.getParent();
2439+
MachineFunction *MF = MBB->getParent();
2440+
ApplyRegBankMapping ApplySALU(B, *this, MRI, &AMDGPU::SGPRRegBank);
2441+
LegalizerHelper Helper(*MF, ApplySALU, B);
2442+
// Widen to S32, but handle `G_XOR x, -1` differently. Legalizer widening
2443+
// will use a G_ANYEXT to extend the -1 which prevents matching G_XOR -1
2444+
// as "not".
2445+
if (MI.getOpcode() == AMDGPU::G_XOR &&
2446+
mi_match(MI.getOperand(2).getReg(), MRI, m_SpecificICstOrSplat(-1))) {
2447+
Helper.widenScalarSrc(MI, S32, 1, AMDGPU::G_ANYEXT);
2448+
Helper.widenScalarSrc(MI, S32, 2, AMDGPU::G_SEXT);
2449+
Helper.widenScalarDst(MI, S32);
2450+
} else {
2451+
if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
2452+
llvm_unreachable("widen scalar should have succeeded");
2453+
}
2454+
return;
2455+
}
2456+
24352457
if (DstTy.getSizeInBits() != 64)
24362458
break;
24372459

llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,20 +391,20 @@ define amdgpu_ps i16 @s_andn2_i16_commute(i16 inreg %src0, i16 inreg %src1) {
391391
define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) {
392392
; GCN-LABEL: s_andn2_i16_multi_use:
393393
; GCN: ; %bb.0:
394-
; GCN-NEXT: s_xor_b32 s1, s3, -1
394+
; GCN-NEXT: s_not_b32 s1, s3
395395
; GCN-NEXT: s_andn2_b32 s0, s2, s3
396396
; GCN-NEXT: ; return to shader part epilog
397397
;
398398
; GFX10-LABEL: s_andn2_i16_multi_use:
399399
; GFX10: ; %bb.0:
400400
; GFX10-NEXT: s_andn2_b32 s0, s2, s3
401-
; GFX10-NEXT: s_xor_b32 s1, s3, -1
401+
; GFX10-NEXT: s_not_b32 s1, s3
402402
; GFX10-NEXT: ; return to shader part epilog
403403
;
404404
; GFX11-LABEL: s_andn2_i16_multi_use:
405405
; GFX11: ; %bb.0:
406406
; GFX11-NEXT: s_and_not1_b32 s0, s2, s3
407-
; GFX11-NEXT: s_xor_b32 s1, s3, -1
407+
; GFX11-NEXT: s_not_b32 s1, s3
408408
; GFX11-NEXT: ; return to shader part epilog
409409
%not.src1 = xor i16 %src1, -1
410410
%and = and i16 %src0, %not.src1
@@ -482,14 +482,14 @@ define amdgpu_ps float @v_andn2_i16_sv(i16 inreg %src0, i16 %src1) {
482482
define amdgpu_ps float @v_andn2_i16_vs(i16 %src0, i16 inreg %src1) {
483483
; GCN-LABEL: v_andn2_i16_vs:
484484
; GCN: ; %bb.0:
485-
; GCN-NEXT: s_xor_b32 s0, s2, -1
485+
; GCN-NEXT: s_not_b32 s0, s2
486486
; GCN-NEXT: v_and_b32_e32 v0, s0, v0
487487
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
488488
; GCN-NEXT: ; return to shader part epilog
489489
;
490490
; GFX10PLUS-LABEL: v_andn2_i16_vs:
491491
; GFX10PLUS: ; %bb.0:
492-
; GFX10PLUS-NEXT: s_xor_b32 s0, s2, -1
492+
; GFX10PLUS-NEXT: s_not_b32 s0, s2
493493
; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0
494494
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
495495
; GFX10PLUS-NEXT: ; return to shader part epilog

0 commit comments

Comments
 (0)