-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Revert "[mlir] [XeGPU] Add XeGPU workgroup to subgroup pass (#139477)" #140779
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This reverts commit 747620d.
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-gpu Author: Jan Patrick Lehr (jplehr) ChangesThis reverts commit 747620d. Multiple bot failures Patch is 36.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140779.diff 6 Files Affected:
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
index 6f585f9ceb29b..3e81f2d0ed786 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+
#ifndef MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD
#define MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD
@@ -17,7 +18,9 @@ def XeGPUFoldAliasOps : Pass<"xegpu-fold-alias-ops"> {
The pass folds aliasing ops into XeGPU ops that they operate on the original
source references.
}];
- let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect"];
+ let dependentDialects = [
+ "memref::MemRefDialect", "xegpu::XeGPUDialect"
+ ];
}
def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> {
@@ -25,24 +28,14 @@ def XeGPUSubgroupDistribute : Pass<"xegpu-subgroup-distribute"> {
let description = [{
The pass distributes subgroup level (SIMD) XeGPU ops to work items.
}];
- let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
- "vector::VectorDialect"];
- let options = [Option<
- "printOnly", "print-analysis-only", "bool",
- /*default=*/"false",
- "Print the result of the subgroup map propagation analysis and exit.">];
-}
-
-def XeGPUWgToSgDistribute : Pass<"xegpu-wg-to-sg-distribute"> {
- let summary = "Transform WorkGroup level XeGPU code to SubGroup level";
- let description = [{
- This transform pass distributes the workgroup level computation to
- multiple subgroups based on the sg_layout and sg_data attributes.
- }];
-
- let dependentDialects = ["memref::MemRefDialect", "xegpu::XeGPUDialect",
- "vector::VectorDialect", "arith::ArithDialect",
- "gpu::GPUDialect", "index::IndexDialect"];
+ let dependentDialects = [
+ "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"
+ ];
+ let options = [
+ Option<"printOnly", "print-analysis-only", "bool",
+ /*default=*/"false",
+ "Print the result of the subgroup map propagation analysis and exit.">
+ ];
}
#endif // MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD
diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
index 44b81796b1313..559cc3ece62fb 100644
--- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Transforms.h
@@ -62,7 +62,6 @@ void populateXeGPUFoldAliasOpsPatterns(RewritePatternSet &patterns);
/// Appends patterns for XeGPU SIMT distribution into `patterns`.
void populateXeGPUSubgroupDistributePatterns(RewritePatternSet &patterns);
-void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns);
/// Collect a set of patterns to unroll xegpu operations to a smaller shapes.
/// Users can control whether an operation to be unrolled or not, as well as
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
index 837303b04e9d7..892eb791c46e7 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
@@ -2,7 +2,6 @@ add_mlir_dialect_library(MLIRXeGPUTransforms
XeGPUFoldAliasOps.cpp
XeGPUSubgroupDistribute.cpp
XeGPUUnroll.cpp
- XeGPUWgToSgDistribute.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/XeGPU
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
deleted file mode 100644
index 3bf76af674ba0..0000000000000
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ /dev/null
@@ -1,378 +0,0 @@
-//===- XeGPUWgToSgDistribute.cpp - XeGPU Workgroup to Subgroup Pass -------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
-
-#include "mlir/Dialect/Affine/Utils.h"
-#include "mlir/Dialect/Arith/Utils/Utils.h"
-#include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/Index/IR/IndexDialect.h"
-#include "mlir/Dialect/Index/IR/IndexOps.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/Utils/IndexingUtils.h"
-#include "mlir/Dialect/XeGPU/IR/XeGPU.h"
-#include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-namespace xegpu {
-#define GEN_PASS_DEF_XEGPUWGTOSGDISTRIBUTE
-#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
-} // namespace xegpu
-} // namespace mlir
-
-using namespace mlir;
-
-namespace {
-
-/// This pattern transforms the CreateNdDescOp to create a subgroup descriptor
-/// from a workgroup descriptor. It replaces the offsets and sizes with
-/// appropriate values for the subgroup.
-/// It uses round-robin assignment to distribute the work to the subgroups.
-/// Following create_nd_desc operation:,
-/// %tdesc = xegpu.create_nd_tdesc %src[0, 0] : memref<24x24xf32>
-/// -> !xegpu.tensor_desc<24x24xf32, #xegpu.layout<sg_layout = [4, 4],
-/// sg_data = [2, 2], lane_layout = [2, 2], lane_data = [1, 1]>>
-/// is converted to 9 subgroup level operations based on the sg_layout &
-/// sg_data:
-/// %tdesc = xegpu.create_nd_tdesc %src[off1, off2] : memref<24x24xf32> ->
-/// !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2],
-/// lane_data = [1, 1]>>
-///
-/// The sg_layout and sg_data attributes are dropped after the pass as they are
-/// no longer needed.
-///
-/// 24x24 matrix distribution example:
-/// sg_layout = [4, 4], sg_data = [2, 2]
-/// Each 8x8 matrix within the 24x24 matrix is called a distribution unit.
-/// dist_unit_shape = [8, 8] --> sg_layout[i] * sg_data[i]
-///
-/// +------------------------+
-/// | 8x8 | 8x8 | 8x8 | <- 3 tiles across
-/// |-----+-----+-----|
-/// | 8x8 | 8x8 | 8x8 | <- 3 tiles down
-/// |-----+-----+-----|
-/// | 8x8 | 8x8 | 8x8 |
-/// +------------------------+
-///
-/// Each 8x8 tile is further subdivided among subgroups:
-/// +------------------------+
-/// | 2x2 2x2 2x2 2x2 | <- 4 subgroups across (each handles 2 columns)
-/// | 2x2 2x2 2x2 2x2 | <- 4 subgroups down (each handles 2 rows)
-/// | 2x2 2x2 2x2 2x2 |
-/// | 2x2 2x2 2x2 2x2 |
-/// +------------------------+
-///
-/// Since the 24x24 matrix is divided into 8x8 distribution units, there will be
-/// 9 distribution units (3x3) in total. Hence the 9 subgroup level operations.
-
-/// The pass currently has entire distribution logic in the WgToSgCreateNdOp
-/// pattern and all the other ops just follow.
-/// TODO: Decouple the distribution logic from WgToSgCreateNdOp for all the
-/// ops in the pass.
-struct WgToSgCreateNdOp : public OpConversionPattern<xegpu::CreateNdDescOp> {
- using OpConversionPattern<xegpu::CreateNdDescOp>::OpConversionPattern;
-
- // Calculate offset for each subgroup
- SmallVector<OpFoldResult>
- calculateGlobalOffsets(ConversionPatternRewriter &rewriter, Location loc,
- const SmallVector<OpFoldResult> &originalOffsets,
- const SmallVector<Value> &localOffset,
- const SmallVector<int64_t> &distUnitBaseAddr,
- const SmallVector<int64_t> &distUnitShape) const {
- assert(localOffset.size() == distUnitBaseAddr.size() &&
- "localOffset and distUnitBaseAddr must have the same rank");
-
- SmallVector<OpFoldResult> globalOffsets(originalOffsets.begin(),
- originalOffsets.end());
- size_t rank = localOffset.size();
- for (size_t i = 0; i < rank; ++i) {
- size_t dimIdx = originalOffsets.size() - rank + i;
- Value constOffset =
- rewriter.create<arith::ConstantIndexOp>(loc, distUnitBaseAddr[i]);
- Value offset =
- rewriter.createOrFold<index::AddOp>(loc, localOffset[i], constOffset);
- Value modValue =
- rewriter.create<arith::ConstantIndexOp>(loc, distUnitShape[i]);
- Value offsetMod =
- rewriter.createOrFold<index::RemUOp>(loc, offset, modValue);
- Value origOffset = getValueOrCreateConstantIndexOp(
- rewriter, loc, originalOffsets[dimIdx]);
- Value globalOffset =
- rewriter.createOrFold<index::AddOp>(loc, origOffset, offsetMod);
- globalOffsets[dimIdx] = globalOffset;
- }
-
- return globalOffsets;
- }
-
- LogicalResult
- matchAndRewrite(xegpu::CreateNdDescOp op, OneToNOpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- Location loc = op.getLoc();
- MLIRContext *ctx = op.getContext();
- xegpu::TensorDescType tdescTy = op.getType();
- auto layout = dyn_cast<xegpu::LayoutAttr>(tdescTy.getLayout());
- if (!layout)
- return failure();
- Type elemTy = tdescTy.getElementType();
- ArrayRef<int64_t> wgShape = tdescTy.getShape();
- // sgLayout must be present for workgroup-level distribution.
- SmallVector<int64_t> sgLayout;
- if (auto sgLayoutAttr = layout.getSgLayout())
- sgLayout = llvm::to_vector_of<int64_t>(sgLayoutAttr.asArrayRef());
- else
- return rewriter.notifyMatchFailure(
- op, "sgLayout attribute is required in layout");
-
- SmallVector<int64_t> sgShape;
- if (auto sgDataAttr = layout.getSgData()) {
- sgShape = llvm::to_vector_of<int64_t>(sgDataAttr.asArrayRef());
- } else {
- assert(wgShape.size() == sgLayout.size() &&
- "sgLayout and wgShape must have the same rank");
- sgShape.reserve(wgShape.size());
- for (size_t i = 0; i < wgShape.size(); ++i) {
- assert(sgLayout[i] != 0 && "sgLayout elements must be non-zero");
- sgShape.push_back(wgShape[i] / sgLayout[i]);
- }
- }
-
- // TODO : Handle order attribute
- // Get the subgroup ID
- auto linearSgId =
- rewriter.create<gpu::SubgroupIdOp>(loc, /*upper_bound=*/nullptr);
-
- // Create constants for layout dimensions
- SmallVector<Value> sgLayoutDim(sgLayout.size());
- SmallVector<Value> sgDataDim(sgShape.size());
-
- for (size_t i = 0; i < sgLayout.size(); i++) {
- sgLayoutDim[i] =
- rewriter.create<arith::ConstantIndexOp>(loc, sgLayout[i]);
- sgDataDim[i] = rewriter.create<arith::ConstantIndexOp>(loc, sgShape[i]);
- }
-
- auto deLinearizeSgId =
- affine::delinearizeIndex(rewriter, loc, linearSgId, sgLayoutDim);
- if (failed(deLinearizeSgId))
- return failure();
- SmallVector<Value> sgIds = *deLinearizeSgId;
-
- // Calculate distribution unit shape and local offsets for subgroup
- SmallVector<int64_t> distUnitShape(sgLayout.size());
- SmallVector<Value> localOffset(sgLayout.size());
- for (size_t i = 0; i < sgLayout.size(); i++) {
- distUnitShape[i] = std::min(sgLayout[i] * sgShape[i], wgShape[i]);
- localOffset[i] =
- rewriter.createOrFold<index::MulOp>(loc, sgIds[i], sgDataDim[i]);
- }
-
- SmallVector<OpFoldResult> originalOffsets = op.getMixedOffsets();
-
- xegpu::TensorDescType newTdescTy =
- xegpu::TensorDescType::get(ctx, sgShape, elemTy, tdescTy.getEncoding(),
- layout.dropSgLayoutAndData());
- SmallVector<Value> newCreateNdOps;
- for (SmallVector<int64_t> distUnitBaseAddr :
- StaticTileOffsetRange(wgShape, distUnitShape)) {
- SmallVector<OpFoldResult> globalOffsets =
- calculateGlobalOffsets(rewriter, loc, originalOffsets, localOffset,
- distUnitBaseAddr, distUnitShape);
-
- auto newCreateNdOp = rewriter.create<xegpu::CreateNdDescOp>(
- loc, newTdescTy, op.getSource(), globalOffsets, op.getMixedSizes(),
- op.getMixedStrides());
- newCreateNdOps.push_back(newCreateNdOp);
- }
-
- rewriter.replaceOpWithMultiple(op, {newCreateNdOps});
- return success();
- }
-};
-
-/// This pattern transforms the LoadNdOp to load subgroup data.
-struct WgToSgLoadNdOp : public OpConversionPattern<xegpu::LoadNdOp> {
- using OpConversionPattern<xegpu::LoadNdOp>::OpConversionPattern;
- LogicalResult
- matchAndRewrite(xegpu::LoadNdOp op, OneToNOpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- SmallVector<Value> newLoadOps;
- for (auto src : adaptor.getTensorDesc()) {
- xegpu::TensorDescType tdescTy =
- dyn_cast<xegpu::TensorDescType>(src.getType());
- ArrayRef<int64_t> srcShape = tdescTy.getShape();
- VectorType newResTy = VectorType::get(srcShape, tdescTy.getElementType());
- auto newLoadOp = rewriter.create<xegpu::LoadNdOp>(op.getLoc(), newResTy,
- src, op->getAttrs());
- newLoadOps.push_back(newLoadOp);
- }
- rewriter.replaceOpWithMultiple(op, {newLoadOps});
- return mlir::success();
- }
-};
-
-/// This pattern transforms the StoreNdOp to store to a subgroup descriptor
-/// It creates a StoreNdOp op to store the updated values to the new subgroup
-/// src tensor descriptors.
-struct WgToSgStoreNdOp : public OpConversionPattern<xegpu::StoreNdOp> {
- using OpConversionPattern<xegpu::StoreNdOp>::OpConversionPattern;
- LogicalResult
- matchAndRewrite(xegpu::StoreNdOp op, OneToNOpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- for (auto [v, t] : llvm::zip(adaptor.getValue(), adaptor.getTensorDesc()))
- rewriter.create<xegpu::StoreNdOp>(op.getLoc(), v, t, op.getL1HintAttr(),
- op.getL2HintAttr(), op.getL3HintAttr());
-
- rewriter.eraseOp(op);
- return success();
- }
-};
-
-/// This pattern transforms the UpdateNdOffsetOp to update the offsets of a
-/// subgroup descriptor. It creates an UpdateNdOffsetOp op to update the
-/// offsets of the new subgroup src tensor descriptors.
-struct WgToSgUpdateNdOffsetOp
- : public OpConversionPattern<xegpu::UpdateNdOffsetOp> {
- using OpConversionPattern<xegpu::UpdateNdOffsetOp>::OpConversionPattern;
- LogicalResult
- matchAndRewrite(xegpu::UpdateNdOffsetOp op, OneToNOpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- llvm::SmallVector<Value> newUpdateTileOffsetOps;
- for (auto tDesc : adaptor.getTensorDesc()) {
- auto newUpdateTileOffsetOp = rewriter.create<xegpu::UpdateNdOffsetOp>(
- op.getLoc(), tDesc.getType(), tDesc, op.getOffsets(),
- op.getConstOffsets());
- newUpdateTileOffsetOps.push_back(newUpdateTileOffsetOp);
- }
-
- rewriter.replaceOpWithMultiple(op, {newUpdateTileOffsetOps});
- return success();
- }
-};
-
-/// This pattern transforms the DpasOp to work at subgroup level.
-struct WgToSgDpasOp : public OpConversionPattern<xegpu::DpasOp> {
- using OpConversionPattern<xegpu::DpasOp>::OpConversionPattern;
- LogicalResult
- matchAndRewrite(xegpu::DpasOp op, OneToNOpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- Location loc = op.getLoc();
- VectorType resultTy = op.getResult().getType();
- if (resultTy.getRank() != 2)
- return failure();
-
- auto originalLayout =
- llvm::dyn_cast_or_null<xegpu::LayoutAttr>(op->getAttr("layout"));
- if (!originalLayout)
- return failure();
-
- SmallVector<Value> newDpasOps;
- size_t i = 0;
- for (auto aVec : adaptor.getLhs()) {
- for (auto bVec : adaptor.getRhs()) {
- llvm::SmallVector<Value> operands({aVec, bVec});
- Value tmpC;
- if (op.getAcc()) {
- tmpC = adaptor.getAcc()[i++];
- operands.push_back(tmpC);
- }
-
- ArrayRef<int64_t> aVecShape =
- llvm::cast<VectorType>(aVec.getType()).getShape();
- ArrayRef<int64_t> bVecShape =
- llvm::cast<VectorType>(bVec.getType()).getShape();
- VectorType resTy = VectorType::get({aVecShape[0], bVecShape[1]},
- resultTy.getElementType());
- tmpC = rewriter.create<xegpu::DpasOp>(
- loc, resTy, operands,
- llvm::ArrayRef<NamedAttribute>(
- {"layout_result_0", originalLayout.dropSgLayoutAndData()}));
- newDpasOps.push_back(tmpC);
- }
- }
- rewriter.replaceOpWithMultiple(op, {newDpasOps});
- return success();
- }
-};
-
-/// This pattern transforms the PrefetchNdOp to prefetch the subgroup data.
-struct WgToSgPrefetchNdOp : public OpConversionPattern<xegpu::PrefetchNdOp> {
- using OpConversionPattern<xegpu::PrefetchNdOp>::OpConversionPattern;
- LogicalResult
- matchAndRewrite(xegpu::PrefetchNdOp op, OneToNOpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- for (auto src : adaptor.getTensorDesc())
- rewriter.create<xegpu::PrefetchNdOp>(op.getLoc(), TypeRange(), src,
- op->getAttrs());
- rewriter.eraseOp(op);
- return success();
- }
-};
-
-} // namespace
-
-namespace mlir {
-namespace xegpu {
-void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns) {
- patterns.add<WgToSgCreateNdOp, WgToSgLoadNdOp, WgToSgStoreNdOp,
- WgToSgUpdateNdOffsetOp, WgToSgDpasOp, WgToSgPrefetchNdOp>(
- patterns.getContext());
-}
-} // namespace xegpu
-} // namespace mlir
-
-namespace {
-struct XeGPUWgToSgDistributePass
- : public xegpu::impl::XeGPUWgToSgDistributeBase<XeGPUWgToSgDistributePass> {
- void runOnOperation() override;
-};
-} // namespace
-
-void XeGPUWgToSgDistributePass::runOnOperation() {
- MLIRContext *ctx = &getContext();
- RewritePatternSet patterns(ctx);
- ConversionTarget target(*ctx);
-
- auto getTensorDescType = [](Operation *op) -> xegpu::TensorDescType {
- if (auto createOp = dyn_cast<xegpu::CreateNdDescOp>(op))
- return createOp.getType();
- if (auto loadOp = dyn_cast<xegpu::LoadNdOp>(op))
- return loadOp.getTensorDescType();
- if (auto storeOp = dyn_cast<xegpu::StoreNdOp>(op))
- return storeOp.getTensorDescType();
- if (auto updateOp = dyn_cast<xegpu::UpdateNdOffsetOp>(op))
- return updateOp.getType();
- if (auto prefetchOp = dyn_cast<xegpu::PrefetchNdOp>(op))
- return prefetchOp.getTensorDescType();
- return xegpu::TensorDescType();
- };
-
- auto isLegal = [&](xegpu::LayoutAttr layout) -> bool {
- return !layout || layout.getSgLayout() == nullptr;
- };
-
- target.addDynamicallyLegalOp<xegpu::CreateNdDescOp, xegpu::LoadNdOp,
- xegpu::StoreNdOp, xegpu::UpdateNdOffsetOp,
- xegpu::PrefetchNdOp>([=](Operation *op) -> bool {
- auto tdescTy = getTensorDescType(op);
- auto layout = dyn_cast_or_null<xegpu::LayoutAttr>(tdescTy.getLayout());
- return isLegal(layout);
- });
-
- target.addDynamicallyLegalOp<xegpu::DpasOp>([=](xegpu::DpasOp op) -> bool {
- auto layout = dyn_cast_or_null<xegpu::LayoutAttr>(op->getAttr("layout"));
- return isLegal(layout);
- });
-
- target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
-
- xegpu::populateXeGPUWgToSgDistributePatterns(patterns);
- if (failed(
- applyPartialConversion(getOperation(), target, std::move(patterns))))
- return signalPassFailure();
-}
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-rr.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-rr.mlir
deleted file mode 100644
index bee026eb2084d..0000000000000
--- a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-rr.mlir
+++ /dev/null
@@ -1,105 +0,0 @@
-// RUN: mlir-opt --xegpu-wg-to-sg-distribute -split-input-file %s | FileCheck %s
-
-gpu.module @test_round_robin_assignment {
- // CHECK-LABEL: test_create_nd_tdesc
- // CHECK-SAME: %[[ARG_0:.*]]: memref<24x32xf32>
- gpu.func @test_create_nd_tdesc(%src: memref<24x32xf32>) {
- // CHECK-COUNT-12: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<24x32xf32>
- // CHECK-SAME: -> !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [2, 2], lane_data = [1, 1]>>
- // CHECK-NOT: xegpu.create_nd_tdesc
- %tdesc = xegpu.create_nd_t...
[truncated]
|
kostasalv
pushed a commit
to kostasalv/llvm-project
that referenced
this pull request
May 21, 2025
)" (llvm#140779) This reverts commit 747620d. Multiple bot failures
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This reverts commit 747620d.
Multiple bot failures