Skip to content

Commit 96e3cd8

Browse files
author
Eli Friedman
committed
[ARM] Lower llvm.ctlz.i32 to a libcall when clz is not available.
The inline sequence is very long (about 70 bytes on Thumb1), so it's not really a good idea to inline it, especially when optimizing for size. Differential Revision: https://reviews.llvm.org/D47917 llvm-svn: 340458
1 parent 20f9cd8 commit 96e3cd8

File tree

4 files changed

+26
-4
lines changed

4 files changed

+26
-4
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ HANDLE_LIBCALL(UDIVREM_I64, nullptr)
8383
HANDLE_LIBCALL(UDIVREM_I128, nullptr)
8484
HANDLE_LIBCALL(NEG_I32, "__negsi2")
8585
HANDLE_LIBCALL(NEG_I64, "__negdi2")
86+
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
87+
HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
88+
HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
8689

8790
// Floating-point
8891
HANDLE_LIBCALL(ADD_F32, "__addsf3")

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4262,6 +4262,21 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
42624262
RTLIB::MUL_I16, RTLIB::MUL_I32,
42634263
RTLIB::MUL_I64, RTLIB::MUL_I128));
42644264
break;
4265+
case ISD::CTLZ_ZERO_UNDEF:
4266+
switch (Node->getSimpleValueType(0).SimpleTy) {
4267+
default:
4268+
llvm_unreachable("LibCall explicitly requested, but not available");
4269+
case MVT::i32:
4270+
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
4271+
break;
4272+
case MVT::i64:
4273+
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
4274+
break;
4275+
case MVT::i128:
4276+
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
4277+
break;
4278+
}
4279+
break;
42654280
}
42664281

42674282
// Replace the original node with the legalized result.

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -850,8 +850,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
850850
}
851851
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
852852
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
853-
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
853+
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
854854
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
855+
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
856+
}
855857

856858
// @llvm.readcyclecounter requires the Performance Monitors extension.
857859
// Default to the 0 expansion on unsupported platforms.

llvm/test/CodeGen/ARM/clz.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s
1+
; RUN: llc -mtriple=arm-eabi -mattr=+v5t %s -o - | FileCheck %s -check-prefixes=CHECK,INLINE
2+
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALL
23

34
declare i32 @llvm.ctlz.i32(i32, i1)
45

56
define i32 @test(i32 %x) {
6-
; CHECK: test
7-
; CHECK: clz r0, r0
7+
; CHECK-LABEL: test
8+
; INLINE: clz r0, r0
9+
; LIBCALL: b __clzsi2
810
%tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
911
ret i32 %tmp.1
1012
}

0 commit comments

Comments
 (0)