@@ -733,3 +733,238 @@ entry:
733
733
%0 = load i32 , ptr inttoptr (i64 2147481600 to ptr )
734
734
ret i32 %0
735
735
}
736
+
737
+ %struct.S = type { i64 , i64 }
738
+
739
+ define i64 @fold_addi_from_different_bb (i64 %k , i64 %n , ptr %a ) nounwind {
740
+ ; RV32I-LABEL: fold_addi_from_different_bb:
741
+ ; RV32I: # %bb.0: # %entry
742
+ ; RV32I-NEXT: addi sp, sp, -48
743
+ ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
744
+ ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
745
+ ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
746
+ ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
747
+ ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
748
+ ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
749
+ ; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
750
+ ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
751
+ ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
752
+ ; RV32I-NEXT: mv s0, a4
753
+ ; RV32I-NEXT: mv s1, a3
754
+ ; RV32I-NEXT: mv s2, a2
755
+ ; RV32I-NEXT: beqz a3, .LBB20_3
756
+ ; RV32I-NEXT: # %bb.1: # %entry
757
+ ; RV32I-NEXT: slti a1, s1, 0
758
+ ; RV32I-NEXT: beqz a1, .LBB20_4
759
+ ; RV32I-NEXT: .LBB20_2:
760
+ ; RV32I-NEXT: li s3, 0
761
+ ; RV32I-NEXT: li s4, 0
762
+ ; RV32I-NEXT: j .LBB20_6
763
+ ; RV32I-NEXT: .LBB20_3:
764
+ ; RV32I-NEXT: seqz a1, s2
765
+ ; RV32I-NEXT: bnez a1, .LBB20_2
766
+ ; RV32I-NEXT: .LBB20_4: # %for.body.lr.ph
767
+ ; RV32I-NEXT: li s5, 0
768
+ ; RV32I-NEXT: li s6, 0
769
+ ; RV32I-NEXT: li s3, 0
770
+ ; RV32I-NEXT: li s4, 0
771
+ ; RV32I-NEXT: slli a0, a0, 4
772
+ ; RV32I-NEXT: add a0, s0, a0
773
+ ; RV32I-NEXT: addi s7, a0, 8
774
+ ; RV32I-NEXT: .LBB20_5: # %for.body
775
+ ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
776
+ ; RV32I-NEXT: mv a0, s0
777
+ ; RV32I-NEXT: call f@plt
778
+ ; RV32I-NEXT: lw a0, 4(s7)
779
+ ; RV32I-NEXT: lw a1, 0(s7)
780
+ ; RV32I-NEXT: add a0, a0, s4
781
+ ; RV32I-NEXT: add s3, a1, s3
782
+ ; RV32I-NEXT: sltu s4, s3, a1
783
+ ; RV32I-NEXT: addi s5, s5, 1
784
+ ; RV32I-NEXT: seqz a1, s5
785
+ ; RV32I-NEXT: add s6, s6, a1
786
+ ; RV32I-NEXT: xor a1, s5, s2
787
+ ; RV32I-NEXT: xor a2, s6, s1
788
+ ; RV32I-NEXT: or a1, a1, a2
789
+ ; RV32I-NEXT: add s4, a0, s4
790
+ ; RV32I-NEXT: bnez a1, .LBB20_5
791
+ ; RV32I-NEXT: .LBB20_6: # %for.cond.cleanup
792
+ ; RV32I-NEXT: mv a0, s3
793
+ ; RV32I-NEXT: mv a1, s4
794
+ ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
795
+ ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
796
+ ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
797
+ ; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
798
+ ; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
799
+ ; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
800
+ ; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
801
+ ; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
802
+ ; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
803
+ ; RV32I-NEXT: addi sp, sp, 48
804
+ ; RV32I-NEXT: ret
805
+ ;
806
+ ; RV32I-MEDIUM-LABEL: fold_addi_from_different_bb:
807
+ ; RV32I-MEDIUM: # %bb.0: # %entry
808
+ ; RV32I-MEDIUM-NEXT: addi sp, sp, -48
809
+ ; RV32I-MEDIUM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
810
+ ; RV32I-MEDIUM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
811
+ ; RV32I-MEDIUM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
812
+ ; RV32I-MEDIUM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
813
+ ; RV32I-MEDIUM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
814
+ ; RV32I-MEDIUM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
815
+ ; RV32I-MEDIUM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
816
+ ; RV32I-MEDIUM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
817
+ ; RV32I-MEDIUM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
818
+ ; RV32I-MEDIUM-NEXT: mv s0, a4
819
+ ; RV32I-MEDIUM-NEXT: mv s1, a3
820
+ ; RV32I-MEDIUM-NEXT: mv s2, a2
821
+ ; RV32I-MEDIUM-NEXT: beqz a3, .LBB20_3
822
+ ; RV32I-MEDIUM-NEXT: # %bb.1: # %entry
823
+ ; RV32I-MEDIUM-NEXT: slti a1, s1, 0
824
+ ; RV32I-MEDIUM-NEXT: beqz a1, .LBB20_4
825
+ ; RV32I-MEDIUM-NEXT: .LBB20_2:
826
+ ; RV32I-MEDIUM-NEXT: li s3, 0
827
+ ; RV32I-MEDIUM-NEXT: li s4, 0
828
+ ; RV32I-MEDIUM-NEXT: j .LBB20_6
829
+ ; RV32I-MEDIUM-NEXT: .LBB20_3:
830
+ ; RV32I-MEDIUM-NEXT: seqz a1, s2
831
+ ; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_2
832
+ ; RV32I-MEDIUM-NEXT: .LBB20_4: # %for.body.lr.ph
833
+ ; RV32I-MEDIUM-NEXT: li s5, 0
834
+ ; RV32I-MEDIUM-NEXT: li s6, 0
835
+ ; RV32I-MEDIUM-NEXT: li s3, 0
836
+ ; RV32I-MEDIUM-NEXT: li s4, 0
837
+ ; RV32I-MEDIUM-NEXT: slli a0, a0, 4
838
+ ; RV32I-MEDIUM-NEXT: add a0, s0, a0
839
+ ; RV32I-MEDIUM-NEXT: addi s7, a0, 8
840
+ ; RV32I-MEDIUM-NEXT: .LBB20_5: # %for.body
841
+ ; RV32I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
842
+ ; RV32I-MEDIUM-NEXT: mv a0, s0
843
+ ; RV32I-MEDIUM-NEXT: call f@plt
844
+ ; RV32I-MEDIUM-NEXT: lw a0, 4(s7)
845
+ ; RV32I-MEDIUM-NEXT: lw a1, 0(s7)
846
+ ; RV32I-MEDIUM-NEXT: add a0, a0, s4
847
+ ; RV32I-MEDIUM-NEXT: add s3, a1, s3
848
+ ; RV32I-MEDIUM-NEXT: sltu s4, s3, a1
849
+ ; RV32I-MEDIUM-NEXT: addi s5, s5, 1
850
+ ; RV32I-MEDIUM-NEXT: seqz a1, s5
851
+ ; RV32I-MEDIUM-NEXT: add s6, s6, a1
852
+ ; RV32I-MEDIUM-NEXT: xor a1, s5, s2
853
+ ; RV32I-MEDIUM-NEXT: xor a2, s6, s1
854
+ ; RV32I-MEDIUM-NEXT: or a1, a1, a2
855
+ ; RV32I-MEDIUM-NEXT: add s4, a0, s4
856
+ ; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_5
857
+ ; RV32I-MEDIUM-NEXT: .LBB20_6: # %for.cond.cleanup
858
+ ; RV32I-MEDIUM-NEXT: mv a0, s3
859
+ ; RV32I-MEDIUM-NEXT: mv a1, s4
860
+ ; RV32I-MEDIUM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
861
+ ; RV32I-MEDIUM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
862
+ ; RV32I-MEDIUM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
863
+ ; RV32I-MEDIUM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
864
+ ; RV32I-MEDIUM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
865
+ ; RV32I-MEDIUM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
866
+ ; RV32I-MEDIUM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
867
+ ; RV32I-MEDIUM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
868
+ ; RV32I-MEDIUM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
869
+ ; RV32I-MEDIUM-NEXT: addi sp, sp, 48
870
+ ; RV32I-MEDIUM-NEXT: ret
871
+ ;
872
+ ; RV64I-LABEL: fold_addi_from_different_bb:
873
+ ; RV64I: # %bb.0: # %entry
874
+ ; RV64I-NEXT: addi sp, sp, -48
875
+ ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
876
+ ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
877
+ ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
878
+ ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
879
+ ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
880
+ ; RV64I-NEXT: blez a1, .LBB20_3
881
+ ; RV64I-NEXT: # %bb.1: # %for.body.lr.ph
882
+ ; RV64I-NEXT: mv s0, a2
883
+ ; RV64I-NEXT: mv s1, a1
884
+ ; RV64I-NEXT: li s2, 0
885
+ ; RV64I-NEXT: slli a0, a0, 4
886
+ ; RV64I-NEXT: add a0, a2, a0
887
+ ; RV64I-NEXT: addi s3, a0, 8
888
+ ; RV64I-NEXT: .LBB20_2: # %for.body
889
+ ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
890
+ ; RV64I-NEXT: mv a0, s0
891
+ ; RV64I-NEXT: call f@plt
892
+ ; RV64I-NEXT: ld a0, 0(s3)
893
+ ; RV64I-NEXT: addi s1, s1, -1
894
+ ; RV64I-NEXT: add s2, a0, s2
895
+ ; RV64I-NEXT: bnez s1, .LBB20_2
896
+ ; RV64I-NEXT: j .LBB20_4
897
+ ; RV64I-NEXT: .LBB20_3:
898
+ ; RV64I-NEXT: li s2, 0
899
+ ; RV64I-NEXT: .LBB20_4: # %for.cond.cleanup
900
+ ; RV64I-NEXT: mv a0, s2
901
+ ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
902
+ ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
903
+ ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
904
+ ; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
905
+ ; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
906
+ ; RV64I-NEXT: addi sp, sp, 48
907
+ ; RV64I-NEXT: ret
908
+ ;
909
+ ; RV64I-MEDIUM-LABEL: fold_addi_from_different_bb:
910
+ ; RV64I-MEDIUM: # %bb.0: # %entry
911
+ ; RV64I-MEDIUM-NEXT: addi sp, sp, -48
912
+ ; RV64I-MEDIUM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
913
+ ; RV64I-MEDIUM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
914
+ ; RV64I-MEDIUM-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
915
+ ; RV64I-MEDIUM-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
916
+ ; RV64I-MEDIUM-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
917
+ ; RV64I-MEDIUM-NEXT: blez a1, .LBB20_3
918
+ ; RV64I-MEDIUM-NEXT: # %bb.1: # %for.body.lr.ph
919
+ ; RV64I-MEDIUM-NEXT: mv s0, a2
920
+ ; RV64I-MEDIUM-NEXT: mv s1, a1
921
+ ; RV64I-MEDIUM-NEXT: li s2, 0
922
+ ; RV64I-MEDIUM-NEXT: slli a0, a0, 4
923
+ ; RV64I-MEDIUM-NEXT: add a0, a2, a0
924
+ ; RV64I-MEDIUM-NEXT: addi s3, a0, 8
925
+ ; RV64I-MEDIUM-NEXT: .LBB20_2: # %for.body
926
+ ; RV64I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
927
+ ; RV64I-MEDIUM-NEXT: mv a0, s0
928
+ ; RV64I-MEDIUM-NEXT: call f@plt
929
+ ; RV64I-MEDIUM-NEXT: ld a0, 0(s3)
930
+ ; RV64I-MEDIUM-NEXT: addi s1, s1, -1
931
+ ; RV64I-MEDIUM-NEXT: add s2, a0, s2
932
+ ; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2
933
+ ; RV64I-MEDIUM-NEXT: j .LBB20_4
934
+ ; RV64I-MEDIUM-NEXT: .LBB20_3:
935
+ ; RV64I-MEDIUM-NEXT: li s2, 0
936
+ ; RV64I-MEDIUM-NEXT: .LBB20_4: # %for.cond.cleanup
937
+ ; RV64I-MEDIUM-NEXT: mv a0, s2
938
+ ; RV64I-MEDIUM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
939
+ ; RV64I-MEDIUM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
940
+ ; RV64I-MEDIUM-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
941
+ ; RV64I-MEDIUM-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
942
+ ; RV64I-MEDIUM-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
943
+ ; RV64I-MEDIUM-NEXT: addi sp, sp, 48
944
+ ; RV64I-MEDIUM-NEXT: ret
945
+ entry:
946
+ %cmp4 = icmp sgt i64 %n , 0
947
+ br i1 %cmp4 , label %for.body.lr.ph , label %for.cond.cleanup
948
+
949
+ for.body.lr.ph: ; preds = %entry
950
+ ; TODO: when this GEP is expanded, the resulting `addi` should be folded
951
+ ; into the load in the loop body.
952
+ %y = getelementptr inbounds %struct.S , ptr %a , i64 %k , i32 1
953
+ br label %for.body
954
+
955
+ for.cond.cleanup: ; preds = %for.body, %entry
956
+ %s.0.lcssa = phi i64 [ 0 , %entry ], [ %add , %for.body ]
957
+ ret i64 %s.0.lcssa
958
+
959
+ for.body: ; preds = %for.body.lr.ph, %for.body
960
+ %i.06 = phi i64 [ 0 , %for.body.lr.ph ], [ %inc , %for.body ]
961
+ %s.05 = phi i64 [ 0 , %for.body.lr.ph ], [ %add , %for.body ]
962
+ call void @f (ptr %a )
963
+ %0 = load i64 , ptr %y , align 8
964
+ %add = add nsw i64 %0 , %s.05
965
+ %inc = add nuw nsw i64 %i.06 , 1
966
+ %exitcond.not = icmp eq i64 %inc , %n
967
+ br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
968
+ }
969
+
970
+ declare void @f (ptr )
0 commit comments