Open
Description
Running llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
through Alive2 (with opt-alive.sh
) shows that GVN performs an unsound transformation, after which the source is more defined than target
Reproducer IR for just running GVN:
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64--"
%union.ElementWiseAccess = type { <4 x float> }
define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) local_unnamed_addr #0 {
%2 = load <4 x float>, ptr %0, align 16
%3 = shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
%4 = getelementptr inbounds [4 x float], ptr %0, i64 0, i64 1
%5 = load float, ptr %4, align 4
%6 = insertelement <4 x float> %3, float %5, i64 1
%7 = getelementptr inbounds [4 x float], ptr %0, i64 0, i64 2
%8 = load float, ptr %7, align 8
%9 = insertelement <4 x float> %6, float %8, i64 2
%10 = insertelement <4 x float> %9, float %8, i64 3
ret <4 x float> %10
}
attributes #0 = { "target-features"="+avx" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) "target-features"="+avx" }
Alive2 report:
define <4 x float> @ConvertVectors_ByRef(ptr nonnull dereferenceable(16) noundef align(16) %0) noundef {
%1:
%2 = load <4 x float>, ptr nonnull dereferenceable(16) noundef align(16) %0, align 16
%3 = shufflevector <4 x float> %2, <4 x float> poison, 0, 4294967295, 4294967295, 4294967295
%4 = gep inbounds ptr nonnull dereferenceable(16) noundef align(16) %0, 16 x i64 0, 4 x i64 1
%5 = load float, ptr %4, align 4
%6 = insertelement <4 x float> %3, float %5, i64 1
%7 = gep inbounds ptr nonnull dereferenceable(16) noundef align(16) %0, 16 x i64 0, 4 x i64 2
%8 = load float, ptr %7, align 8
%9 = insertelement <4 x float> %6, float %8, i64 2
%10 = insertelement <4 x float> %9, float %8, i64 3
ret <4 x float> %10
}
=>
define <4 x float> @ConvertVectors_ByRef(ptr nonnull dereferenceable(16) noundef align(16) %0) noundef {
%1:
%2 = load <4 x float>, ptr nonnull dereferenceable(16) noundef align(16) %0, align 16
%3 = shufflevector <4 x float> %2, <4 x float> poison, 0, 4294967295, 4294967295, 4294967295
%5 = bitcast <4 x float> %2 to i128
%6 = lshr i128 %5, 32
%7 = trunc i128 %6 to i32
%8 = bitcast i32 %7 to float
%9 = insertelement <4 x float> %3, float %8, i64 1
%11 = lshr i128 %5, 64
%12 = trunc i128 %11 to i32
%13 = bitcast i32 %12 to float
%14 = insertelement <4 x float> %9, float %13, i64 2
%15 = insertelement <4 x float> %14, float %13, i64 3
ret <4 x float> %15
}
Transformation doesn't verify! (unsound)
ERROR: Source is more defined than target
Example:
ptr nonnull dereferenceable(16) noundef align(16) %0 = pointer(non-local, block_id=0, offset=33)
Source:
<4 x float> %2 = < #x00000000 (+0.0), #x00000000 (+0.0), #x00000000 (+0.0), poison >
<4 x float> %3 = < #x00000000 (+0.0), poison, poison, poison >
ptr %4 = pointer(non-local, block_id=0, offset=37)
float %5 = #x00000000 (+0.0)
<4 x float> %6 = < #x00000000 (+0.0), #x00000000 (+0.0), poison, poison >
ptr %7 = pointer(non-local, block_id=0, offset=41)
float %8 = #x00000000 (+0.0)
<4 x float> %9 = < #x00000000 (+0.0), #x00000000 (+0.0), #x00000000 (+0.0), poison >
<4 x float> %10 = < #x00000000 (+0.0), #x00000000 (+0.0), #x00000000 (+0.0), #x00000000 (+0.0) >
SOURCE MEMORY STATE
===================
NON-LOCAL BLOCKS:
Block 0 > size: 128 align: 2 alloc type: 0 address: 15
Target:
<4 x float> %2 = < #x00000000 (+0.0), #x00000000 (+0.0), #x00000000 (+0.0), poison >
<4 x float> %3 = < #x00000000 (+0.0), poison, poison, poison >
i128 %5 = poison
i128 %6 = poison
i32 %7 = poison
float %8 = poison
<4 x float> %9 = < #x00000000 (+0.0), poison, poison, poison >
i128 %11 = poison
i32 %12 = poison
float %13 = poison
<4 x float> %14 = < #x00000000 (+0.0), poison, poison, poison >
<4 x float> %15 = < #x00000000 (+0.0), poison, poison, poison >