@@ -3329,9 +3329,7 @@ define i16 @v_fshl_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3329
3329
; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
3330
3330
; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
3331
3331
; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
3332
- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
3333
3332
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
3334
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3335
3333
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
3336
3334
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
3337
3335
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
@@ -3486,10 +3484,8 @@ define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt)
3486
3484
; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
3487
3485
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3488
3486
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3489
- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3490
3487
; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
3491
3488
; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
3492
- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3493
3489
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3494
3490
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
3495
3491
; GFX6-NEXT: ; return to shader part epilog
@@ -3793,20 +3789,16 @@ define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3793
3789
; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
3794
3790
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3795
3791
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3796
- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
3797
3792
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3798
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3799
3793
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
3800
3794
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3801
3795
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3802
3796
; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
3803
3797
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3804
3798
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3805
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3806
3799
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
3807
3800
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3808
- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
3809
- ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3801
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3810
3802
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3811
3803
; GFX6-NEXT: s_setpc_b64 s[30:31]
3812
3804
;
@@ -3942,18 +3934,14 @@ define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %
3942
3934
; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3943
3935
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3944
3936
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3945
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3946
3937
; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
3947
3938
; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
3948
- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3949
3939
; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3950
3940
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3951
3941
; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3952
3942
; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3953
3943
; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3954
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3955
3944
; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
3956
- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3957
3945
; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
3958
3946
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3959
3947
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
@@ -4450,28 +4438,22 @@ define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4450
4438
; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
4451
4439
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4452
4440
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4453
- ; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
4454
4441
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4455
- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4456
4442
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
4457
4443
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4458
4444
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
4459
4445
; GFX6-NEXT: v_and_b32_e32 v3, 15, v7
4460
4446
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
4461
4447
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4462
- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4463
4448
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
4464
4449
; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4465
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4466
- ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4450
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4467
4451
; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4468
4452
; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
4469
4453
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8
4470
4454
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4471
- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4472
4455
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v3, v2
4473
4456
; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
4474
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4475
4457
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4476
4458
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
4477
4459
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -4790,37 +4772,29 @@ define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4790
4772
; GFX6-NEXT: v_and_b32_e32 v12, 15, v8
4791
4773
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4792
4774
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4793
- ; GFX6-NEXT: v_and_b32_e32 v12, 0xffff, v12
4794
4775
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4795
- ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4796
4776
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v12, v0
4797
4777
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
4798
4778
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
4799
4779
; GFX6-NEXT: v_and_b32_e32 v4, 15, v9
4800
4780
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9
4801
4781
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4802
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4803
4782
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
4804
4783
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4805
- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
4806
- ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4784
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
4807
4785
; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
4808
4786
; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
4809
4787
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
4810
4788
; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
4811
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4812
4789
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
4813
4790
; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
4814
- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
4815
4791
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4816
4792
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
4817
4793
; GFX6-NEXT: v_and_b32_e32 v4, 15, v11
4818
4794
; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
4819
4795
; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
4820
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4821
4796
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
4822
4797
; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
4823
- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
4824
4798
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4825
4799
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
4826
4800
; GFX6-NEXT: s_setpc_b64 s[30:31]
0 commit comments