[DAGCombiner] Fix mayAlias not accounting for scalable MMOs with offsets (#90573)

lukel97 · web-flow · commit 5e03c0af4745 · 2024-04-30T20:20:40.000+08:00
In #70452 DAGCombiner::mayAlias was taught to handle scalable sizes, but when it checks via AA->isNoAlias it didn't take into account the case where the size is scalable but there was an offset too. For the fixed length case the offset was just accounted for by adding to the LocationSize, but for the scalable case there doesn't seem to be a way to represent both a scalable and fixed part in it. So this patch works around it by bailing if there is an offset. Fixes #90559
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -28113,7 +28113,10 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
 #endif
 
   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
-      Size0.hasValue() && Size1.hasValue()) {
+      Size0.hasValue() && Size1.hasValue() &&
+      // Can't represent a scalable size + fixed offset in LocationSize
+      (!Size0.isScalable() || SrcValOffset0 == 0) &&
+      (!Size1.isScalable() || SrcValOffset1 == 0)) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
     int64_t Overlap0 =
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
@@ -1,19 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
 
-; FIXME: The i32 load and store pair isn't dead and shouldn't be omitted.
+; This showcases a miscompile that was fixed in #90573:
+; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores.
+; - the load and store of q aliases the upper 128 bits store of p.
+; - The aliasing 128 bit store will be between the chain of the scalar
+;   load/store:
+;
+;   t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ...
+;   t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ...
+;
+;   t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ...
+;   t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ...
+;   t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ...
+;
+; Previously, the scalar load/store was incorrectly combined away:
+;
+;   t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ...
+;   t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ...
+;
+;   // MISSING
+;   t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ...
+;   // MISSING
+; See also pr83017.ll: This is the same code, but relies on vscale_range instead
+; of -riscv-v-vector-bits-max=128.
 define void @f(ptr %p) vscale_range(2,2) {
 ; CHECK-LABEL: f:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vs4r.v v8, (a0)
-; CHECK-NEXT:    addi a1, a0, 80
+; CHECK-NEXT:    lw a1, 84(a0)
+; CHECK-NEXT:    addi a2, a0, 80
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vs1r.v v8, (a1)
-; CHECK-NEXT:    addi a0, a0, 64
-; CHECK-NEXT:    vs1r.v v8, (a0)
+; CHECK-NEXT:    vs1r.v v8, (a2)
+; CHECK-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vs4r.v v12, (a0)
+; CHECK-NEXT:    addi a2, a0, 64
+; CHECK-NEXT:    vs1r.v v8, (a2)
+; CHECK-NEXT:    sw a1, 84(a0)
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i64 84
   %x = load i32, ptr %q