Skip to content

Commit 9b51200

Browse files
authored
[RISCV] A test demonstrating missed opportunity to combine addi into (#67022)
load / store offset. The patch to address this will be in a separate PR. A possible fix: https://github.com/llvm/llvm-project/pull/67024/files
1 parent 0ff9625 commit 9b51200

File tree

1 file changed

+235
-0
lines changed

1 file changed

+235
-0
lines changed

llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,3 +733,238 @@ entry:
733733
%0 = load i32, ptr inttoptr (i64 2147481600 to ptr)
734734
ret i32 %0
735735
}
736+
737+
%struct.S = type { i64, i64 }
738+
739+
define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind {
740+
; RV32I-LABEL: fold_addi_from_different_bb:
741+
; RV32I: # %bb.0: # %entry
742+
; RV32I-NEXT: addi sp, sp, -48
743+
; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
744+
; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
745+
; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
746+
; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
747+
; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
748+
; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
749+
; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
750+
; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
751+
; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
752+
; RV32I-NEXT: mv s0, a4
753+
; RV32I-NEXT: mv s1, a3
754+
; RV32I-NEXT: mv s2, a2
755+
; RV32I-NEXT: beqz a3, .LBB20_3
756+
; RV32I-NEXT: # %bb.1: # %entry
757+
; RV32I-NEXT: slti a1, s1, 0
758+
; RV32I-NEXT: beqz a1, .LBB20_4
759+
; RV32I-NEXT: .LBB20_2:
760+
; RV32I-NEXT: li s3, 0
761+
; RV32I-NEXT: li s4, 0
762+
; RV32I-NEXT: j .LBB20_6
763+
; RV32I-NEXT: .LBB20_3:
764+
; RV32I-NEXT: seqz a1, s2
765+
; RV32I-NEXT: bnez a1, .LBB20_2
766+
; RV32I-NEXT: .LBB20_4: # %for.body.lr.ph
767+
; RV32I-NEXT: li s5, 0
768+
; RV32I-NEXT: li s6, 0
769+
; RV32I-NEXT: li s3, 0
770+
; RV32I-NEXT: li s4, 0
771+
; RV32I-NEXT: slli a0, a0, 4
772+
; RV32I-NEXT: add a0, s0, a0
773+
; RV32I-NEXT: addi s7, a0, 8
774+
; RV32I-NEXT: .LBB20_5: # %for.body
775+
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
776+
; RV32I-NEXT: mv a0, s0
777+
; RV32I-NEXT: call f@plt
778+
; RV32I-NEXT: lw a0, 4(s7)
779+
; RV32I-NEXT: lw a1, 0(s7)
780+
; RV32I-NEXT: add a0, a0, s4
781+
; RV32I-NEXT: add s3, a1, s3
782+
; RV32I-NEXT: sltu s4, s3, a1
783+
; RV32I-NEXT: addi s5, s5, 1
784+
; RV32I-NEXT: seqz a1, s5
785+
; RV32I-NEXT: add s6, s6, a1
786+
; RV32I-NEXT: xor a1, s5, s2
787+
; RV32I-NEXT: xor a2, s6, s1
788+
; RV32I-NEXT: or a1, a1, a2
789+
; RV32I-NEXT: add s4, a0, s4
790+
; RV32I-NEXT: bnez a1, .LBB20_5
791+
; RV32I-NEXT: .LBB20_6: # %for.cond.cleanup
792+
; RV32I-NEXT: mv a0, s3
793+
; RV32I-NEXT: mv a1, s4
794+
; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
795+
; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
796+
; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
797+
; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
798+
; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
799+
; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
800+
; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
801+
; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
802+
; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
803+
; RV32I-NEXT: addi sp, sp, 48
804+
; RV32I-NEXT: ret
805+
;
806+
; RV32I-MEDIUM-LABEL: fold_addi_from_different_bb:
807+
; RV32I-MEDIUM: # %bb.0: # %entry
808+
; RV32I-MEDIUM-NEXT: addi sp, sp, -48
809+
; RV32I-MEDIUM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
810+
; RV32I-MEDIUM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
811+
; RV32I-MEDIUM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
812+
; RV32I-MEDIUM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
813+
; RV32I-MEDIUM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
814+
; RV32I-MEDIUM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
815+
; RV32I-MEDIUM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
816+
; RV32I-MEDIUM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
817+
; RV32I-MEDIUM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
818+
; RV32I-MEDIUM-NEXT: mv s0, a4
819+
; RV32I-MEDIUM-NEXT: mv s1, a3
820+
; RV32I-MEDIUM-NEXT: mv s2, a2
821+
; RV32I-MEDIUM-NEXT: beqz a3, .LBB20_3
822+
; RV32I-MEDIUM-NEXT: # %bb.1: # %entry
823+
; RV32I-MEDIUM-NEXT: slti a1, s1, 0
824+
; RV32I-MEDIUM-NEXT: beqz a1, .LBB20_4
825+
; RV32I-MEDIUM-NEXT: .LBB20_2:
826+
; RV32I-MEDIUM-NEXT: li s3, 0
827+
; RV32I-MEDIUM-NEXT: li s4, 0
828+
; RV32I-MEDIUM-NEXT: j .LBB20_6
829+
; RV32I-MEDIUM-NEXT: .LBB20_3:
830+
; RV32I-MEDIUM-NEXT: seqz a1, s2
831+
; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_2
832+
; RV32I-MEDIUM-NEXT: .LBB20_4: # %for.body.lr.ph
833+
; RV32I-MEDIUM-NEXT: li s5, 0
834+
; RV32I-MEDIUM-NEXT: li s6, 0
835+
; RV32I-MEDIUM-NEXT: li s3, 0
836+
; RV32I-MEDIUM-NEXT: li s4, 0
837+
; RV32I-MEDIUM-NEXT: slli a0, a0, 4
838+
; RV32I-MEDIUM-NEXT: add a0, s0, a0
839+
; RV32I-MEDIUM-NEXT: addi s7, a0, 8
840+
; RV32I-MEDIUM-NEXT: .LBB20_5: # %for.body
841+
; RV32I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
842+
; RV32I-MEDIUM-NEXT: mv a0, s0
843+
; RV32I-MEDIUM-NEXT: call f@plt
844+
; RV32I-MEDIUM-NEXT: lw a0, 4(s7)
845+
; RV32I-MEDIUM-NEXT: lw a1, 0(s7)
846+
; RV32I-MEDIUM-NEXT: add a0, a0, s4
847+
; RV32I-MEDIUM-NEXT: add s3, a1, s3
848+
; RV32I-MEDIUM-NEXT: sltu s4, s3, a1
849+
; RV32I-MEDIUM-NEXT: addi s5, s5, 1
850+
; RV32I-MEDIUM-NEXT: seqz a1, s5
851+
; RV32I-MEDIUM-NEXT: add s6, s6, a1
852+
; RV32I-MEDIUM-NEXT: xor a1, s5, s2
853+
; RV32I-MEDIUM-NEXT: xor a2, s6, s1
854+
; RV32I-MEDIUM-NEXT: or a1, a1, a2
855+
; RV32I-MEDIUM-NEXT: add s4, a0, s4
856+
; RV32I-MEDIUM-NEXT: bnez a1, .LBB20_5
857+
; RV32I-MEDIUM-NEXT: .LBB20_6: # %for.cond.cleanup
858+
; RV32I-MEDIUM-NEXT: mv a0, s3
859+
; RV32I-MEDIUM-NEXT: mv a1, s4
860+
; RV32I-MEDIUM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
861+
; RV32I-MEDIUM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
862+
; RV32I-MEDIUM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
863+
; RV32I-MEDIUM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
864+
; RV32I-MEDIUM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
865+
; RV32I-MEDIUM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
866+
; RV32I-MEDIUM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
867+
; RV32I-MEDIUM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
868+
; RV32I-MEDIUM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
869+
; RV32I-MEDIUM-NEXT: addi sp, sp, 48
870+
; RV32I-MEDIUM-NEXT: ret
871+
;
872+
; RV64I-LABEL: fold_addi_from_different_bb:
873+
; RV64I: # %bb.0: # %entry
874+
; RV64I-NEXT: addi sp, sp, -48
875+
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
876+
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
877+
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
878+
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
879+
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
880+
; RV64I-NEXT: blez a1, .LBB20_3
881+
; RV64I-NEXT: # %bb.1: # %for.body.lr.ph
882+
; RV64I-NEXT: mv s0, a2
883+
; RV64I-NEXT: mv s1, a1
884+
; RV64I-NEXT: li s2, 0
885+
; RV64I-NEXT: slli a0, a0, 4
886+
; RV64I-NEXT: add a0, a2, a0
887+
; RV64I-NEXT: addi s3, a0, 8
888+
; RV64I-NEXT: .LBB20_2: # %for.body
889+
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
890+
; RV64I-NEXT: mv a0, s0
891+
; RV64I-NEXT: call f@plt
892+
; RV64I-NEXT: ld a0, 0(s3)
893+
; RV64I-NEXT: addi s1, s1, -1
894+
; RV64I-NEXT: add s2, a0, s2
895+
; RV64I-NEXT: bnez s1, .LBB20_2
896+
; RV64I-NEXT: j .LBB20_4
897+
; RV64I-NEXT: .LBB20_3:
898+
; RV64I-NEXT: li s2, 0
899+
; RV64I-NEXT: .LBB20_4: # %for.cond.cleanup
900+
; RV64I-NEXT: mv a0, s2
901+
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
902+
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
903+
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
904+
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
905+
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
906+
; RV64I-NEXT: addi sp, sp, 48
907+
; RV64I-NEXT: ret
908+
;
909+
; RV64I-MEDIUM-LABEL: fold_addi_from_different_bb:
910+
; RV64I-MEDIUM: # %bb.0: # %entry
911+
; RV64I-MEDIUM-NEXT: addi sp, sp, -48
912+
; RV64I-MEDIUM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
913+
; RV64I-MEDIUM-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
914+
; RV64I-MEDIUM-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
915+
; RV64I-MEDIUM-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
916+
; RV64I-MEDIUM-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
917+
; RV64I-MEDIUM-NEXT: blez a1, .LBB20_3
918+
; RV64I-MEDIUM-NEXT: # %bb.1: # %for.body.lr.ph
919+
; RV64I-MEDIUM-NEXT: mv s0, a2
920+
; RV64I-MEDIUM-NEXT: mv s1, a1
921+
; RV64I-MEDIUM-NEXT: li s2, 0
922+
; RV64I-MEDIUM-NEXT: slli a0, a0, 4
923+
; RV64I-MEDIUM-NEXT: add a0, a2, a0
924+
; RV64I-MEDIUM-NEXT: addi s3, a0, 8
925+
; RV64I-MEDIUM-NEXT: .LBB20_2: # %for.body
926+
; RV64I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1
927+
; RV64I-MEDIUM-NEXT: mv a0, s0
928+
; RV64I-MEDIUM-NEXT: call f@plt
929+
; RV64I-MEDIUM-NEXT: ld a0, 0(s3)
930+
; RV64I-MEDIUM-NEXT: addi s1, s1, -1
931+
; RV64I-MEDIUM-NEXT: add s2, a0, s2
932+
; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2
933+
; RV64I-MEDIUM-NEXT: j .LBB20_4
934+
; RV64I-MEDIUM-NEXT: .LBB20_3:
935+
; RV64I-MEDIUM-NEXT: li s2, 0
936+
; RV64I-MEDIUM-NEXT: .LBB20_4: # %for.cond.cleanup
937+
; RV64I-MEDIUM-NEXT: mv a0, s2
938+
; RV64I-MEDIUM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
939+
; RV64I-MEDIUM-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
940+
; RV64I-MEDIUM-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
941+
; RV64I-MEDIUM-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
942+
; RV64I-MEDIUM-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
943+
; RV64I-MEDIUM-NEXT: addi sp, sp, 48
944+
; RV64I-MEDIUM-NEXT: ret
945+
entry:
946+
%cmp4 = icmp sgt i64 %n, 0
947+
br i1 %cmp4, label %for.body.lr.ph, label %for.cond.cleanup
948+
949+
for.body.lr.ph: ; preds = %entry
950+
; TODO: when this GEP is expanded, the resulting `addi` should be folded
951+
; into the load in the loop body.
952+
%y = getelementptr inbounds %struct.S, ptr %a, i64 %k, i32 1
953+
br label %for.body
954+
955+
for.cond.cleanup: ; preds = %for.body, %entry
956+
%s.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
957+
ret i64 %s.0.lcssa
958+
959+
for.body: ; preds = %for.body.lr.ph, %for.body
960+
%i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
961+
%s.05 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
962+
call void @f(ptr %a)
963+
%0 = load i64, ptr %y, align 8
964+
%add = add nsw i64 %0, %s.05
965+
%inc = add nuw nsw i64 %i.06, 1
966+
%exitcond.not = icmp eq i64 %inc, %n
967+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
968+
}
969+
970+
declare void @f(ptr)

0 commit comments

Comments
 (0)