Skip to content

[SPARC] Use lzcnt to implement CTLZ when we have VIS3 #135715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
12 changes: 10 additions & 2 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
Expand Down Expand Up @@ -1753,7 +1754,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i64,
Subtarget->usePopc() ? Legal : Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ, MVT::i64,
Subtarget->isVIS3() ? Legal : LibCall);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
Expand Down Expand Up @@ -1815,7 +1817,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32,
Subtarget->isVIS3() ? Promote : LibCall);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
Expand Down Expand Up @@ -1986,6 +1989,11 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (Subtarget->hasLeonCycleCounter())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);

if (Subtarget->isVIS3()) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
}

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

setMinFunctionAlignment(Align(4));
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/Sparc/SparcInstrVIS.td
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,7 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
// VIS3 instruction patterns.
let Predicates = [HasVIS3] in {
def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;

def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
} // Predicates = [HasVIS3]
313 changes: 313 additions & 0 deletions llvm/test/CodeGen/SPARC/ctlz.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3

define i32 @i32_nopoison(i32 %x) nounwind {
; V9-LABEL: i32_nopoison:
; V9: ! %bb.0:
; V9-NEXT: cmp %o0, 0
; V9-NEXT: be %icc, .LBB0_2
; V9-NEXT: nop
; V9-NEXT: ! %bb.1: ! %cond.false
; V9-NEXT: srl %o0, 1, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 2, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 4, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 8, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 16, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: xor %o0, -1, %o0
; V9-NEXT: srl %o0, 1, %o1
; V9-NEXT: sethi 1398101, %o2
; V9-NEXT: or %o2, 341, %o2
; V9-NEXT: and %o1, %o2, %o1
; V9-NEXT: sub %o0, %o1, %o0
; V9-NEXT: sethi 838860, %o1
; V9-NEXT: or %o1, 819, %o1
; V9-NEXT: and %o0, %o1, %o2
; V9-NEXT: srl %o0, 2, %o0
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: add %o2, %o0, %o0
; V9-NEXT: srl %o0, 4, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: sethi 246723, %o1
; V9-NEXT: or %o1, 783, %o1
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: sll %o0, 8, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: sll %o0, 16, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: retl
; V9-NEXT: srl %o0, 24, %o0
; V9-NEXT: .LBB0_2:
; V9-NEXT: retl
; V9-NEXT: mov 32, %o0
;
; POPC-LABEL: i32_nopoison:
; POPC: ! %bb.0:
; POPC-NEXT: cmp %o0, 0
; POPC-NEXT: be %icc, .LBB0_2
; POPC-NEXT: nop
; POPC-NEXT: ! %bb.1: ! %cond.false
; POPC-NEXT: srl %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: srl %o0, 0, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
; POPC-NEXT: .LBB0_2:
; POPC-NEXT: retl
; POPC-NEXT: mov 32, %o0
;
; VIS3-LABEL: i32_nopoison:
; VIS3: ! %bb.0:
; VIS3-NEXT: cmp %o0, 0
; VIS3-NEXT: be %icc, .LBB0_2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The branch is redundant, but I guess there is nothing we can do with it right now. It must've been created too early.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those turns out to be controlled by the codegen parameters you mentioned above, it seems.

; VIS3-NEXT: nop
; VIS3-NEXT: ! %bb.1: ! %cond.false
; VIS3-NEXT: sllx %o0, 32, %o0
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
; VIS3-NEXT: .LBB0_2:
; VIS3-NEXT: retl
; VIS3-NEXT: mov 32, %o0
%ret = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
ret i32 %ret
}

define i32 @i32_poison(i32 %x) nounwind {
; V9-LABEL: i32_poison:
; V9: ! %bb.0:
; V9-NEXT: srl %o0, 1, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 2, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 4, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 8, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srl %o0, 16, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: xor %o0, -1, %o0
; V9-NEXT: srl %o0, 1, %o1
; V9-NEXT: sethi 1398101, %o2
; V9-NEXT: or %o2, 341, %o2
; V9-NEXT: and %o1, %o2, %o1
; V9-NEXT: sub %o0, %o1, %o0
; V9-NEXT: sethi 838860, %o1
; V9-NEXT: or %o1, 819, %o1
; V9-NEXT: and %o0, %o1, %o2
; V9-NEXT: srl %o0, 2, %o0
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: add %o2, %o0, %o0
; V9-NEXT: srl %o0, 4, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: sethi 246723, %o1
; V9-NEXT: or %o1, 783, %o1
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: sll %o0, 8, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: sll %o0, 16, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: retl
; V9-NEXT: srl %o0, 24, %o0
;
; POPC-LABEL: i32_poison:
; POPC: ! %bb.0:
; POPC-NEXT: srl %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srl %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: srl %o0, 0, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
;
; VIS3-LABEL: i32_poison:
; VIS3: ! %bb.0:
; VIS3-NEXT: sllx %o0, 32, %o0
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
%ret = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
ret i32 %ret
}

define i64 @i64_nopoison(i64 %x) nounwind {
; V9-LABEL: i64_nopoison:
; V9: ! %bb.0:
; V9-NEXT: brz %o0, .LBB2_2
; V9-NEXT: nop
; V9-NEXT: ! %bb.1: ! %cond.false
; V9-NEXT: srlx %o0, 1, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 2, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 4, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 8, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 16, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 32, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: xor %o0, -1, %o0
; V9-NEXT: srlx %o0, 1, %o1
; V9-NEXT: sethi 1398101, %o2
; V9-NEXT: or %o2, 341, %o2
; V9-NEXT: sllx %o2, 32, %o3
; V9-NEXT: or %o3, %o2, %o2
; V9-NEXT: and %o1, %o2, %o1
; V9-NEXT: sub %o0, %o1, %o0
; V9-NEXT: sethi 838860, %o1
; V9-NEXT: or %o1, 819, %o1
; V9-NEXT: sllx %o1, 32, %o2
; V9-NEXT: or %o2, %o1, %o1
; V9-NEXT: and %o0, %o1, %o2
; V9-NEXT: srlx %o0, 2, %o0
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: add %o2, %o0, %o0
; V9-NEXT: srlx %o0, 4, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: sethi 246723, %o1
; V9-NEXT: or %o1, 783, %o1
; V9-NEXT: sllx %o1, 32, %o2
; V9-NEXT: or %o2, %o1, %o1
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: sethi 16448, %o1
; V9-NEXT: or %o1, 257, %o1
; V9-NEXT: sllx %o1, 32, %o2
; V9-NEXT: or %o2, %o1, %o1
; V9-NEXT: mulx %o0, %o1, %o0
; V9-NEXT: retl
; V9-NEXT: srlx %o0, 56, %o0
; V9-NEXT: .LBB2_2:
; V9-NEXT: retl
; V9-NEXT: mov 64, %o0
;
; POPC-LABEL: i64_nopoison:
; POPC: ! %bb.0:
; POPC-NEXT: brz %o0, .LBB2_2
; POPC-NEXT: nop
; POPC-NEXT: ! %bb.1: ! %cond.false
; POPC-NEXT: srlx %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 32, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
; POPC-NEXT: .LBB2_2:
; POPC-NEXT: retl
; POPC-NEXT: mov 64, %o0
;
; VIS3-LABEL: i64_nopoison:
; VIS3: ! %bb.0:
; VIS3-NEXT: brz %o0, .LBB2_2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is odd, I'd expect no branches here.

; VIS3-NEXT: nop
; VIS3-NEXT: ! %bb.1: ! %cond.false
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
; VIS3-NEXT: .LBB2_2:
; VIS3-NEXT: retl
; VIS3-NEXT: mov 64, %o0
%ret = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
ret i64 %ret
}

define i64 @i64_poison(i64 %x) nounwind {
; V9-LABEL: i64_poison:
; V9: ! %bb.0:
; V9-NEXT: srlx %o0, 1, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 2, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 4, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 8, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 16, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: srlx %o0, 32, %o1
; V9-NEXT: or %o0, %o1, %o0
; V9-NEXT: xor %o0, -1, %o0
; V9-NEXT: srlx %o0, 1, %o1
; V9-NEXT: sethi 1398101, %o2
; V9-NEXT: or %o2, 341, %o2
; V9-NEXT: sllx %o2, 32, %o3
; V9-NEXT: or %o3, %o2, %o2
; V9-NEXT: and %o1, %o2, %o1
; V9-NEXT: sub %o0, %o1, %o0
; V9-NEXT: sethi 838860, %o1
; V9-NEXT: or %o1, 819, %o1
; V9-NEXT: sllx %o1, 32, %o2
; V9-NEXT: or %o2, %o1, %o1
; V9-NEXT: and %o0, %o1, %o2
; V9-NEXT: srlx %o0, 2, %o0
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: add %o2, %o0, %o0
; V9-NEXT: srlx %o0, 4, %o1
; V9-NEXT: add %o0, %o1, %o0
; V9-NEXT: sethi 246723, %o1
; V9-NEXT: or %o1, 783, %o1
; V9-NEXT: sllx %o1, 32, %o2
; V9-NEXT: or %o2, %o1, %o1
; V9-NEXT: and %o0, %o1, %o0
; V9-NEXT: sethi 16448, %o1
; V9-NEXT: or %o1, 257, %o1
; V9-NEXT: sllx %o1, 32, %o2
; V9-NEXT: or %o2, %o1, %o1
; V9-NEXT: mulx %o0, %o1, %o0
; V9-NEXT: retl
; V9-NEXT: srlx %o0, 56, %o0
;
; POPC-LABEL: i64_poison:
; POPC: ! %bb.0:
; POPC-NEXT: srlx %o0, 1, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 2, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 4, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 8, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 16, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: srlx %o0, 32, %o1
; POPC-NEXT: or %o0, %o1, %o0
; POPC-NEXT: xor %o0, -1, %o0
; POPC-NEXT: retl
; POPC-NEXT: popc %o0, %o0
;
; VIS3-LABEL: i64_poison:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: lzcnt %o0, %o0
%ret = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
ret i64 %ret
}
Loading