@@ -324,20 +324,74 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
324
324
define i32 @ctpop32 (i32 %x ) nounwind readnone {
325
325
; CHECK-LABEL: ctpop32:
326
326
; CHECK: @ %bb.0:
327
- ; CHECK-NEXT: b __popcountsi2
327
+ ; CHECK-NEXT: ldr r1, .LCPI22_0
328
+ ; CHECK-NEXT: ldr r2, .LCPI22_3
329
+ ; CHECK-NEXT: and r1, r1, r0, lsr #1
330
+ ; CHECK-NEXT: ldr r12, .LCPI22_1
331
+ ; CHECK-NEXT: sub r0, r0, r1
332
+ ; CHECK-NEXT: ldr r3, .LCPI22_2
333
+ ; CHECK-NEXT: and r1, r0, r2
334
+ ; CHECK-NEXT: and r0, r2, r0, lsr #2
335
+ ; CHECK-NEXT: add r0, r1, r0
336
+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
337
+ ; CHECK-NEXT: and r0, r0, r12
338
+ ; CHECK-NEXT: mul r1, r0, r3
339
+ ; CHECK-NEXT: lsr r0, r1, #24
340
+ ; CHECK-NEXT: mov pc, lr
341
+ ; CHECK-NEXT: .p2align 2
342
+ ; CHECK-NEXT: @ %bb.1:
343
+ ; CHECK-NEXT: .LCPI22_0:
344
+ ; CHECK-NEXT: .long 1431655765 @ 0x55555555
345
+ ; CHECK-NEXT: .LCPI22_1:
346
+ ; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
347
+ ; CHECK-NEXT: .LCPI22_2:
348
+ ; CHECK-NEXT: .long 16843009 @ 0x1010101
349
+ ; CHECK-NEXT: .LCPI22_3:
350
+ ; CHECK-NEXT: .long 858993459 @ 0x33333333
328
351
%count = tail call i32 @llvm.ctpop.i32 (i32 %x )
329
352
ret i32 %count
330
353
}
331
354
332
355
define i64 @ctpop64 (i64 %x ) nounwind readnone {
333
356
; CHECK-LABEL: ctpop64:
334
357
; CHECK: @ %bb.0:
335
- ; CHECK-NEXT: .save {r11, lr}
336
- ; CHECK-NEXT: push {r11, lr}
337
- ; CHECK-NEXT: bl __popcountdi2
338
- ; CHECK-NEXT: asr r1, r0, #31
339
- ; CHECK-NEXT: pop {r11, lr}
358
+ ; CHECK-NEXT: .save {r4, lr}
359
+ ; CHECK-NEXT: push {r4, lr}
360
+ ; CHECK-NEXT: ldr r2, .LCPI23_0
361
+ ; CHECK-NEXT: ldr r3, .LCPI23_3
362
+ ; CHECK-NEXT: and r4, r2, r0, lsr #1
363
+ ; CHECK-NEXT: and r2, r2, r1, lsr #1
364
+ ; CHECK-NEXT: sub r0, r0, r4
365
+ ; CHECK-NEXT: sub r1, r1, r2
366
+ ; CHECK-NEXT: and r4, r0, r3
367
+ ; CHECK-NEXT: and r2, r1, r3
368
+ ; CHECK-NEXT: and r0, r3, r0, lsr #2
369
+ ; CHECK-NEXT: and r1, r3, r1, lsr #2
370
+ ; CHECK-NEXT: add r0, r4, r0
371
+ ; CHECK-NEXT: ldr lr, .LCPI23_1
372
+ ; CHECK-NEXT: add r1, r2, r1
373
+ ; CHECK-NEXT: ldr r12, .LCPI23_2
374
+ ; CHECK-NEXT: add r0, r0, r0, lsr #4
375
+ ; CHECK-NEXT: and r0, r0, lr
376
+ ; CHECK-NEXT: add r1, r1, r1, lsr #4
377
+ ; CHECK-NEXT: mul r2, r0, r12
378
+ ; CHECK-NEXT: and r0, r1, lr
379
+ ; CHECK-NEXT: mul r1, r0, r12
380
+ ; CHECK-NEXT: lsr r0, r2, #24
381
+ ; CHECK-NEXT: add r0, r0, r1, lsr #24
382
+ ; CHECK-NEXT: mov r1, #0
383
+ ; CHECK-NEXT: pop {r4, lr}
340
384
; CHECK-NEXT: mov pc, lr
385
+ ; CHECK-NEXT: .p2align 2
386
+ ; CHECK-NEXT: @ %bb.1:
387
+ ; CHECK-NEXT: .LCPI23_0:
388
+ ; CHECK-NEXT: .long 1431655765 @ 0x55555555
389
+ ; CHECK-NEXT: .LCPI23_1:
390
+ ; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
391
+ ; CHECK-NEXT: .LCPI23_2:
392
+ ; CHECK-NEXT: .long 16843009 @ 0x1010101
393
+ ; CHECK-NEXT: .LCPI23_3:
394
+ ; CHECK-NEXT: .long 858993459 @ 0x33333333
341
395
%count = tail call i64 @llvm.ctpop.i64 (i64 %x )
342
396
ret i64 %count
343
397
}
0 commit comments