-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[mlir][SME] Update E2E test to show potential optimisation (NFC) #107585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-linalg Author: Hugo Trachino (nujaa) ChangesIntroduces loop hoisting to ARM SME E2E tests to allow the hoisting of the tile load offering very important speedup. Discussed here : https://discourse.llvm.org/t/mlir-for-arm-sme-reducing-tile-data-transfers/80065/2 Full diff: https://github.com/llvm/llvm-project/pull/107585.diff 2 Files Affected:
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
index a57348a543c3cf..886211b65efa2d 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
@@ -82,8 +82,16 @@ module attributes {transform.with_named_sequence} {
transform.apply_patterns to %func {
transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct"
transform.apply_patterns.vector.lower_masks
+ transform.apply_patterns.canonicalization
} : !transform.any_op
+ // Step 5: Hoist load of accumulator.
+ %func_h = transform.structured.hoist_redundant_vector_transfers %func
+ : (!transform.any_op) -> !transform.any_op
+ %all_loops = transform.structured.match interface{LoopLikeInterface} in %module
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_licm to %all_loops : !transform.any_op
+ transform.loop.hoist_loop_invariant_subsets %all_loops : !transform.any_op
transform.yield
}
}
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
index 79c9fcac70604b..4b6b9a9c746499 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
@@ -88,8 +88,16 @@ module attributes {transform.with_named_sequence} {
transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct"
transform.apply_patterns.vector.lower_masks
transform.apply_patterns.vector.rank_reducing_subview_patterns
+ transform.apply_patterns.canonicalization
} : !transform.any_op
+ // Step 6: Hoist load of accumulator.
+ %func_h = transform.structured.hoist_redundant_vector_transfers %func
+ : (!transform.any_op) -> !transform.any_op
+ %all_loops = transform.structured.match interface{LoopLikeInterface} in %bufferize
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_licm to %all_loops : !transform.any_op
+ transform.loop.hoist_loop_invariant_subsets %all_loops : !transform.any_op
transform.yield
}
}
|
@llvm/pr-subscribers-mlir-sme Author: Hugo Trachino (nujaa) ChangesIntroduces loop hoisting to ARM SME E2E tests to allow the hoisting of the tile load offering very important speedup. Discussed here : https://discourse.llvm.org/t/mlir-for-arm-sme-reducing-tile-data-transfers/80065/2 Full diff: https://github.com/llvm/llvm-project/pull/107585.diff 2 Files Affected:
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
index a57348a543c3cf..886211b65efa2d 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
@@ -82,8 +82,16 @@ module attributes {transform.with_named_sequence} {
transform.apply_patterns to %func {
transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct"
transform.apply_patterns.vector.lower_masks
+ transform.apply_patterns.canonicalization
} : !transform.any_op
+ // Step 5: Hoist load of accumulator.
+ %func_h = transform.structured.hoist_redundant_vector_transfers %func
+ : (!transform.any_op) -> !transform.any_op
+ %all_loops = transform.structured.match interface{LoopLikeInterface} in %module
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_licm to %all_loops : !transform.any_op
+ transform.loop.hoist_loop_invariant_subsets %all_loops : !transform.any_op
transform.yield
}
}
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
index 79c9fcac70604b..4b6b9a9c746499 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir
@@ -88,8 +88,16 @@ module attributes {transform.with_named_sequence} {
transform.apply_patterns.vector.lower_contraction lowering_strategy = "outerproduct"
transform.apply_patterns.vector.lower_masks
transform.apply_patterns.vector.rank_reducing_subview_patterns
+ transform.apply_patterns.canonicalization
} : !transform.any_op
+ // Step 6: Hoist load of accumulator.
+ %func_h = transform.structured.hoist_redundant_vector_transfers %func
+ : (!transform.any_op) -> !transform.any_op
+ %all_loops = transform.structured.match interface{LoopLikeInterface} in %bufferize
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_licm to %all_loops : !transform.any_op
+ transform.loop.hoist_loop_invariant_subsets %all_loops : !transform.any_op
transform.yield
}
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks Hugo, LGTM!
Could you add a note that the additional step is not required for functional correctness and that instead it's an optimisation? This is obvious today, but our future selves might forget ;-) Thanks!
} : !transform.any_op | ||
|
||
// Step 5: Hoist load of accumulator. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: It's both the load and store of the accumulator that's hoisted.
Typo optionnal -> optional (also maybe say optimization rather than optional) |
Introduces loop hoisting to ARM SME E2E tests to allow the hoisting of the tile load offering very important speedup.
Discussed here : https://discourse.llvm.org/t/mlir-for-arm-sme-reducing-tile-data-transfers/80065/2