Open
Description
This is an enhancement request. While u16, u32 and u64 numbers get divided by a small constant divisor using a fast algorithm, the same isn't true for u128 numbers:
fn digits_sum0(mut n: u16) -> u16 {
let mut total = 0;
while n != 0 {
total += n % 10;
n /= 10;
}
total
}
fn digits_sum1(mut n: u32) -> u32 {
let mut total = 0;
while n != 0 {
total += n % 10;
n /= 10;
}
total
}
fn digits_sum2(mut n: u64) -> u64 {
let mut total = 0;
while n != 0 {
total += n % 10;
n /= 10;
}
total
}
fn digits_sum3(mut n: u128) -> u128 {
let mut total = 0;
while n != 0 {
total += n % 10;
n /= 10;
}
total
}
Generate asm (with -O):
digits_sum0:
xor eax, eax
test di, di
je .LBB0_2
.LBB0_1:
movzx ecx, di
imul edx, ecx, 52429
shr edx, 19
lea esi, [rdx + rdx]
lea esi, [rsi + 4*rsi]
sub edi, esi
add eax, edi
mov edi, edx
cmp ecx, 10
jae .LBB0_1
.LBB0_2:
ret
digits_sum1:
xor eax, eax
test edi, edi
je .LBB0_3
mov r8d, 3435973837
.LBB0_2:
mov edx, edi
imul rdx, r8
shr rdx, 35
lea esi, [rdx + rdx]
lea esi, [rsi + 4*rsi]
mov ecx, edi
sub ecx, esi
add eax, ecx
cmp edi, 10
mov edi, edx
jae .LBB0_2
.LBB0_3:
ret
digits_sum2:
xor ecx, ecx
test rdi, rdi
je .LBB1_3
movabs r8, -3689348814741910323
.LBB1_2:
mov rax, rdi
mul r8
shr rdx, 3
lea rax, [rdx + rdx]
lea rax, [rax + 4*rax]
mov rsi, rdi
sub rsi, rax
add rcx, rsi
cmp rdi, 10
mov rdi, rdx
jae .LBB1_2
.LBB1_3:
mov rax, rcx
ret
digits_sum3:
push r15
push r14
push r13
push r12
push rbx
mov rax, rdi
or rax, rsi
je .LBB2_1
mov rbx, rsi
mov r15, rdi
xor r14d, r14d
mov r13d, 10
xor r12d, r12d
.LBB2_4:
mov edx, 10
xor ecx, ecx
mov rdi, r15
mov rsi, rbx
call __udivti3@PLT
mov rcx, rax
mov rsi, rdx
mul r13
lea rdi, [rsi + 4*rsi]
lea rdx, [rdx + 2*rdi]
mov rdi, r15
sub rdi, rax
mov rax, rbx
sbb rax, rdx
add r14, rdi
adc r12, rax
cmp r15, 10
sbb rbx, 0
mov r15, rcx
mov rbx, rsi
jae .LBB2_4
jmp .LBB2_2
.LBB2_1:
xor r14d, r14d
xor r12d, r12d
.LBB2_2:
mov rax, r14
mov rdx, r12
pop rbx
pop r12
pop r13
pop r14
pop r15
ret
The faster algorithm is short enough and it could be added to rustc:
http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
http://ridiculousfish.com/blog/posts/labor-of-division-episode-ii.html
http://libdivide.com/
http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html