-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[LAA] Use MaxStride instead of CommonStride to calculate MaxVF #98142
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
1763f46
[LAA] Add test for loop with different strides
mrdaybird 98e2348
[LAA] Use MaxStride instead of CommonStride to get Max safe VF
mrdaybird 5f8f703
[LAA] Update comment, update prev test and add new test
mrdaybird d7668a7
[LAA] Update test
mrdaybird 60eb82b
Merge branch 'main' into different-strides
fhahn e6bb89c
Apply suggestions from code review
fhahn 1c6a594
Apply suggestions from code review
fhahn File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
156 changes: 156 additions & 0 deletions
156
llvm/test/Analysis/LoopAccessAnalysis/different_strides.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: opt -passes="print<access-info>" %s 2>&1 | FileCheck %s | ||
|
||
@a = dso_local local_unnamed_addr global [65536 x float] zeroinitializer, align 16 | ||
|
||
; Generated from the following C code: | ||
; #define LEN 256 * 256 | ||
; float a[LEN]; | ||
; | ||
; void different_strides() { | ||
; for (int i = 0; i < LEN - 1024 - 255; i++) { | ||
; #pragma clang loop interleave(disable) | ||
; #pragma clang loop unroll(disable) | ||
; for (int j = 0; j < 256; j++) | ||
; a[i + j + 1024] += a[j * 4 + i]; | ||
; } | ||
; } | ||
; The load and store have different strides(4 and 16 bytes respectively) but the store | ||
; is always at safe positive distance away from the load, thus BackwardVectorizable | ||
define void @different_strides_backward_vectorizable() { | ||
; CHECK-LABEL: 'different_strides_backward_vectorizable' | ||
; CHECK-NEXT: inner.body: | ||
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits | ||
; CHECK-NEXT: Dependences: | ||
; CHECK-NEXT: BackwardVectorizable: | ||
; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 -> | ||
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Forward: | ||
; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 -> | ||
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Run-time memory checks: | ||
; CHECK-NEXT: Grouped accesses: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; CHECK-NEXT: SCEV assumptions: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Expressions re-written: | ||
; CHECK-NEXT: outer.header: | ||
; CHECK-NEXT: Report: loop is not the innermost loop | ||
; CHECK-NEXT: Dependences: | ||
; CHECK-NEXT: Run-time memory checks: | ||
; CHECK-NEXT: Grouped accesses: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; CHECK-NEXT: SCEV assumptions: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Expressions re-written: | ||
; | ||
entry: | ||
br label %outer.header | ||
|
||
outer.header: | ||
%i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ] | ||
%0 = add nuw nsw i64 %i, 1024 | ||
br label %inner.body | ||
|
||
inner.body: | ||
%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ] | ||
%1 = shl nuw nsw i64 %j, 2 | ||
%2 = add nuw nsw i64 %1, %i | ||
%arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2 | ||
%3 = load float, ptr %arrayidx, align 4 | ||
%4 = add nuw nsw i64 %0, %j | ||
%arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4 | ||
%5 = load float, ptr %arrayidx8, align 4 | ||
%add9 = fadd fast float %5, %3 | ||
store float %add9, ptr %arrayidx8, align 4 | ||
%j.next = add nuw nsw i64 %j, 1 | ||
%exitcond.not = icmp eq i64 %j.next, 256 | ||
br i1 %exitcond.not, label %outer.latch, label %inner.body | ||
|
||
outer.latch: | ||
%i.next = add nuw nsw i64 %i, 1 | ||
%outerexitcond.not = icmp eq i64 %i.next, 64257 | ||
br i1 %outerexitcond.not, label %exit, label %outer.header | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
|
||
; Generated from following C code: | ||
; void different_stride_and_not_vectorizable(){ | ||
; for(int i = 0; i < LEN2; i++){ | ||
; for(int j = 0 ; j < LEN; j++){ | ||
; a[i + j + LEN] += a[i + 4*j]; | ||
; } | ||
; } | ||
; } | ||
; The load and store have different strides, but the store and load are not at a | ||
; safe distance away from each other, thus not safe for vectorization. | ||
define void @different_stride_and_not_vectorizable() { | ||
; CHECK-LABEL: 'different_stride_and_not_vectorizable' | ||
; CHECK-NEXT: inner.body: | ||
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop | ||
; CHECK-NEXT: Unknown data dependence. | ||
; CHECK-NEXT: Dependences: | ||
; CHECK-NEXT: Unknown: | ||
; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 -> | ||
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Forward: | ||
; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 -> | ||
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4 | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Run-time memory checks: | ||
; CHECK-NEXT: Grouped accesses: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; CHECK-NEXT: SCEV assumptions: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Expressions re-written: | ||
; CHECK-NEXT: outer.header: | ||
; CHECK-NEXT: Report: loop is not the innermost loop | ||
; CHECK-NEXT: Dependences: | ||
; CHECK-NEXT: Run-time memory checks: | ||
; CHECK-NEXT: Grouped accesses: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. | ||
; CHECK-NEXT: SCEV assumptions: | ||
; CHECK-EMPTY: | ||
; CHECK-NEXT: Expressions re-written: | ||
; | ||
entry: | ||
br label %outer.header | ||
|
||
outer.header: | ||
%i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ] | ||
%0 = add nuw nsw i64 %i, 256 | ||
br label %inner.body | ||
|
||
inner.body: | ||
%j = phi i64 [ 0, %outer.header ], [ %j.next, %inner.body ] | ||
%1 = shl nuw nsw i64 %j, 2 | ||
%2 = add nuw nsw i64 %1, %i | ||
%arrayidx = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %2 | ||
%3 = load float, ptr %arrayidx, align 4 | ||
%4 = add nuw nsw i64 %0, %j | ||
%arrayidx8 = getelementptr inbounds [65536 x float], ptr @a, i64 0, i64 %4 | ||
%5 = load float, ptr %arrayidx8, align 4 | ||
%add9 = fadd fast float %5, %3 | ||
store float %add9, ptr %arrayidx8, align 4 | ||
%j.next = add nuw nsw i64 %j, 1 | ||
%exitcond.not = icmp eq i64 %j.next, 256 | ||
br i1 %exitcond.not, label %outer.latch, label %inner.body | ||
|
||
outer.latch: | ||
%i.next = add nuw nsw i64 %i, 1 | ||
%exitcond29.not = icmp eq i64 %i.next, 65536 | ||
br i1 %exitcond29.not, label %exit, label %outer.header | ||
|
||
exit: | ||
ret void | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.