@@ -82,26 +82,25 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) {
82
82
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
83
83
; CHECK-NEXT: s_mov_b32 s4, 0x800000
84
84
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
85
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
85
+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
86
86
; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1
87
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
88
- ; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3
87
+ ; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
89
88
; CHECK-NEXT: v_log_f32_e32 v3, v3
90
- ; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
91
89
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
90
+ ; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
92
91
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
93
92
; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2
94
- ; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
95
93
; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000
94
+ ; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4
96
95
; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000
97
96
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
98
97
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
99
98
; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3
100
99
; CHECK-NEXT: v_exp_f32_e32 v2, v2
101
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000
102
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1. 0, v3, vcc
100
+ ; CHECK-NEXT: v_not_b32_e32 v3, 63
101
+ ; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
103
102
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
104
- ; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3
103
+ ; CHECK-NEXT: v_ldexp_f32 v2, v2, v3
105
104
; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
106
105
; CHECK-NEXT: s_setpc_b64 s[30:31]
107
106
%y = sitofp i32 %y.i to float
@@ -228,9 +227,8 @@ define float @test_powr_fast_f32(float %x, float %y) {
228
227
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229
228
; CHECK-NEXT: s_mov_b32 s4, 0x800000
230
229
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
231
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
232
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
233
- ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v3
230
+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
231
+ ; CHECK-NEXT: v_ldexp_f32 v0, v0, v3
234
232
; CHECK-NEXT: v_log_f32_e32 v0, v0
235
233
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
236
234
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
@@ -242,9 +240,9 @@ define float @test_powr_fast_f32(float %x, float %y) {
242
240
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
243
241
; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2
244
242
; CHECK-NEXT: v_exp_f32_e32 v0, v0
245
- ; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000
246
- ; CHECK-NEXT: v_cndmask_b32_e32 v1, 1. 0, v1, vcc
247
- ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
243
+ ; CHECK-NEXT: v_not_b32_e32 v1, 63
244
+ ; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
245
+ ; CHECK-NEXT: v_ldexp_f32 v0, v0, v1
248
246
; CHECK-NEXT: s_setpc_b64 s[30:31]
249
247
%powr = tail call fast float @_Z4powrff (float %x , float %y )
250
248
ret float %powr
@@ -368,9 +366,8 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
368
366
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369
367
; CHECK-NEXT: s_mov_b32 s4, 0x800000
370
368
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
371
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
372
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
373
- ; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3
369
+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
370
+ ; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
374
371
; CHECK-NEXT: v_log_f32_e32 v3, v3
375
372
; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1
376
373
; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000
@@ -383,10 +380,10 @@ define float @test_pown_fast_f32(float %x, i32 %y) {
383
380
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
384
381
; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3
385
382
; CHECK-NEXT: v_exp_f32_e32 v2, v2
386
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000
387
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1. 0, v3, vcc
383
+ ; CHECK-NEXT: v_not_b32_e32 v3, 63
384
+ ; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
388
385
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1
389
- ; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3
386
+ ; CHECK-NEXT: v_ldexp_f32 v2, v2, v3
390
387
; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2
391
388
; CHECK-NEXT: s_setpc_b64 s[30:31]
392
389
%call = tail call fast float @_Z4pownfi (float %x , i32 %y )
@@ -511,9 +508,8 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
511
508
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512
509
; CHECK-NEXT: s_mov_b32 s4, 0x800000
513
510
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
514
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
515
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
516
- ; CHECK-NEXT: v_mul_f32_e64 v0, |v0|, v3
511
+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
512
+ ; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3
517
513
; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1
518
514
; CHECK-NEXT: v_log_f32_e32 v0, v0
519
515
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
@@ -527,9 +523,9 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) {
527
523
; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc
528
524
; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2
529
525
; CHECK-NEXT: v_exp_f32_e32 v0, v0
530
- ; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000
531
- ; CHECK-NEXT: v_cndmask_b32_e32 v1, 1. 0, v1, vcc
532
- ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
526
+ ; CHECK-NEXT: v_not_b32_e32 v1, 63
527
+ ; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
528
+ ; CHECK-NEXT: v_ldexp_f32 v0, v0, v1
533
529
; CHECK-NEXT: s_setpc_b64 s[30:31]
534
530
%y = shl i32 %y.arg , 1
535
531
%call = tail call fast float @_Z4pownfi (float %x , i32 %y )
@@ -651,9 +647,8 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
651
647
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652
648
; CHECK-NEXT: s_mov_b32 s4, 0x800000
653
649
; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
654
- ; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000
655
- ; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
656
- ; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3
650
+ ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
651
+ ; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3
657
652
; CHECK-NEXT: v_or_b32_e32 v1, 1, v1
658
653
; CHECK-NEXT: v_log_f32_e32 v3, v3
659
654
; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1
@@ -667,10 +662,10 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) {
667
662
; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
668
663
; CHECK-NEXT: v_fma_f32 v1, v2, v1, v3
669
664
; CHECK-NEXT: v_exp_f32_e32 v1, v1
670
- ; CHECK-NEXT: v_mov_b32_e32 v2, 0x1f800000
671
- ; CHECK-NEXT: v_cndmask_b32_e32 v2, 1. 0, v2, vcc
665
+ ; CHECK-NEXT: v_not_b32_e32 v2, 63
666
+ ; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
672
667
; CHECK-NEXT: s_brev_b32 s4, 1
673
- ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v2
668
+ ; CHECK-NEXT: v_ldexp_f32 v1, v1, v2
674
669
; CHECK-NEXT: v_and_or_b32 v0, v0, s4, v1
675
670
; CHECK-NEXT: s_setpc_b64 s[30:31]
676
671
%y = or i32 %y.arg , 1
0 commit comments