Skip to content

[SPARC] Use lzcnt to implement CTLZ when we have VIS3 #135715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
19 changes: 17 additions & 2 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
Expand Down Expand Up @@ -1753,7 +1754,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64,
Subtarget->usePopc() ? Legal : Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ, MVT::i64,
Subtarget->isVIS3() ? Legal : Expand);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
Expand Down Expand Up @@ -1815,7 +1817,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32,
Subtarget->isVIS3() ? Promote : Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
Expand Down Expand Up @@ -1986,6 +1989,18 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (Subtarget->hasLeonCycleCounter())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);

if (Subtarget->isVIS3()) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
} else if (Subtarget->usePopc()) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,
Subtarget->is64Bit() ? Promote : LibCall);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, LibCall);
}

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

setMinFunctionAlignment(Align(4));
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrVIS.td
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,7 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
// VIS3 instruction patterns.
let Predicates = [HasVIS3] in {
def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;

def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
} // Predicates = [HasVIS3]
183 changes: 183 additions & 0 deletions llvm/test/CodeGen/SPARC/ctlz.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3

define i32 @i32_nopoison(i32 %x) nounwind {
; V9-LABEL: i32_nopoison:
; V9: ! %bb.0:
; V9-NEXT: save %sp, -176, %sp
; V9-NEXT: cmp %i0, 0
; V9-NEXT: be %icc, .LBB0_2
; V9-NEXT: nop
; V9-NEXT: ! %bb.1: ! %cond.false
; V9-NEXT: call __clzdi2
; V9-NEXT: sllx %i0, 32, %o0
; V9-NEXT: ret
; V9-NEXT: restore %g0, %o0, %o0
; V9-NEXT: .LBB0_2:
; V9-NEXT: ret
; V9-NEXT: restore %g0, 32, %o0
;
; POPC-LABEL: i32_nopoison:
; POPC: ! %bb.0:
; POPC-NEXT: cmp %o0, 0
; POPC-NEXT: be %icc, .LBB0_2
; POPC-NEXT: nop
; POPC-NEXT: ! %bb.1: ! %cond.false
; POPC-NEXT: srl %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: srl %o0, 0, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
; POPC-NEXT: .LBB0_2:
; POPC-NEXT: retl
; POPC-NEXT: mov 32, %o0
;
; VIS3-LABEL: i32_nopoison:
; VIS3: ! %bb.0:
; VIS3-NEXT: cmp %o0, 0
; VIS3-NEXT: be %icc, .LBB0_2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The branch is redundant, but I guess there is nothing we can do with it right now. It must've been created too early.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those turns out to be controlled by the codegen parameters you mentioned above, it seems.

; VIS3-NEXT: nop
; VIS3-NEXT: ! %bb.1: ! %cond.false
; VIS3-NEXT: sllx %o0, 32, %o0
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
; VIS3-NEXT: .LBB0_2:
; VIS3-NEXT: retl
; VIS3-NEXT: mov 32, %o0
%ret = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
ret i32 %ret
}

define i32 @i32_poison(i32 %x) nounwind {
; V9-LABEL: i32_poison:
; V9: ! %bb.0:
; V9-NEXT: save %sp, -176, %sp
; V9-NEXT: call __clzdi2
; V9-NEXT: sllx %i0, 32, %o0
; V9-NEXT: ret
; V9-NEXT: restore %g0, %o0, %o0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder why gcc produced another shift and an add...

;
; POPC-LABEL: i32_poison:
; POPC: ! %bb.0:
; POPC-NEXT: srl %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: srl %o0, 0, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
;
; VIS3-LABEL: i32_poison:
; VIS3: ! %bb.0:
; VIS3-NEXT: sllx %o0, 32, %o0
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
%ret = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
ret i32 %ret
}

define i64 @i64_nopoison(i64 %x) nounwind {
; V9-LABEL: i64_nopoison:
; V9: ! %bb.0:
; V9-NEXT: save %sp, -176, %sp
; V9-NEXT: brz %i0, .LBB2_2
; V9-NEXT: nop
; V9-NEXT: ! %bb.1: ! %cond.false
; V9-NEXT: call __clzdi2
; V9-NEXT: mov %i0, %o0
; V9-NEXT: ret
; V9-NEXT: restore %g0, %o0, %o0
; V9-NEXT: .LBB2_2:
; V9-NEXT: ret
; V9-NEXT: restore %g0, 64, %o0
;
; POPC-LABEL: i64_nopoison:
; POPC: ! %bb.0:
; POPC-NEXT: brz %o0, .LBB2_2
; POPC-NEXT: nop
; POPC-NEXT: ! %bb.1: ! %cond.false
; POPC-NEXT: srlx %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 32, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
; POPC-NEXT: .LBB2_2:
; POPC-NEXT: retl
; POPC-NEXT: mov 64, %o0
;
; VIS3-LABEL: i64_nopoison:
; VIS3: ! %bb.0:
; VIS3-NEXT: brz %o0, .LBB2_2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is odd, I'd expect no branches here.

; VIS3-NEXT: nop
; VIS3-NEXT: ! %bb.1: ! %cond.false
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
; VIS3-NEXT: .LBB2_2:
; VIS3-NEXT: retl
; VIS3-NEXT: mov 64, %o0
%ret = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
ret i64 %ret
}

define i64 @i64_poison(i64 %x) nounwind {
; V9-LABEL: i64_poison:
; V9: ! %bb.0:
; V9-NEXT: save %sp, -176, %sp
; V9-NEXT: call __clzdi2
; V9-NEXT: mov %i0, %o0
; V9-NEXT: ret
; V9-NEXT: restore %g0, %o0, %o0
;
; POPC-LABEL: i64_poison:
; POPC: ! %bb.0:
; POPC-NEXT: srlx %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 32, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
;
; VIS3-LABEL: i64_poison:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
%ret = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
ret i64 %ret
}
Loading