Skip to content

[XeVM] Add first integration tests #425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
# Copyright (C) 2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,7 +45,8 @@ option(GC_ENABLE_TEST_DNNL_API "Build the dnnl tests" ${GC_ENABLE_DNNL_API})
option(GC_ENABLE_TEST_MLIR "Build the mlir tests" ON)
option(GC_ENABLE_TOOLS "Build the tools" ON)
option(GC_ENABLE_OPT "Build gc-opt" ${GC_ENABLE_TOOLS})
option(GC_ENABLE_IMEX "Enable Intel® Extension for MLIR" OFF)
option(GC_ENABLE_IMEX "Enable Intel® Extension for MLIR (implicitly enables GPU compilation)" OFF)
option(GC_ENABLE_GPU "Enable GPU runtime and tools components" OFF)
option(GC_ENABLE_BINDINGS_PYTHON "Enable Graph Complier Python Binding" ON)
option(GC_DEV_LINK_LLVM_DYLIB "Link dynamic libraries of LLVM and MLIR. For developers only. Do not use it in packing the library." OFF)
option(GC_ENABLE_RUNTIME_NAIVE_BRGEMM "Use naive BRGEMM as runtime backend for debug purpose." OFF)
Expand All @@ -55,6 +56,10 @@ if(GC_ENABLE_LEGACY)
add_subdirectory(legacy/core)
endif()

if (GC_ENABLE_GPU)
set(GC_ENABLE_GPU ON)
endif()

if (GC_ENABLE_IMEX)
# normalize the value for lit config
set(GC_ENABLE_IMEX ON)
Expand All @@ -70,6 +75,9 @@ endif()
############################## Targets #########################################
# All common options, includes etc. are added to this interface target.
add_library(GcInterface INTERFACE)
if (GC_ENABLE_GPU)
target_compile_options(GcInterface INTERFACE -DGC_USE_GPU)
endif()
target_compile_features(GcInterface INTERFACE cxx_std_17)
target_include_directories(GcInterface INTERFACE
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,6 @@ Graph Compiler supports the following build-time options.
| GC_ENABLE_TEST | **ON**, OFF | Controls building the tests |
| GC_DEV_LINK_LLVM_DYLIB | ON, **OFF** | Controls dynamic link LLVM/MLIR libraries, mainly for developer |
| GC_ENABLE_BINDINGS_PYTHON | **ON**, OFF | Controls building the Python API |
| GC_ENABLE_IMEX | ON, **OFF** | Whether to enable the GPU components |
| GC_ENABLE_IMEX | ON, **OFF** | Whether to enable the IMEX components |
| GC_ENABLE_GPU | ON, **OFF** | Whether to enable the GPU tools and components |

1 change: 1 addition & 0 deletions include/gc/Conversion/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define GC_CONVERSION_PASSES_H

#include "gc/Conversion/XeVMToLLVM/XeVMToLLVM.h"
#include "mlir/Pass/Pass.h"

namespace mlir {

Expand Down
70 changes: 70 additions & 0 deletions include/gc/Dialect/LLVMIR/XeVMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,76 @@ def XeVM_BlockPrefetch2dOp : XeVM_Op<"blockprefetch2d">,
let hasVerifier = 1;
}

def XeVM_MatrixElemType : AnyTypeOf<[AnyI8, AnyI16, AnyI32, F32, F16, BF16]>;

/// Enum attribute of the different precision types.
def XeVM_PrecisionTypeAttr : I32EnumAttr<"PrecisionType",
"XeVM precision type",
[
I32EnumAttrCase<"UNUSED", 0, "unused">,
I32EnumAttrCase<"U8", 1, "u8">,
I32EnumAttrCase<"U4", 2, "u4">,
I32EnumAttrCase<"U2", 3, "u2">,
I32EnumAttrCase<"S8", 4, "i8">,
I32EnumAttrCase<"S4", 5, "i4">,
I32EnumAttrCase<"S2", 6, "i2">,
I32EnumAttrCase<"BF8", 7, "bf8">,
I32EnumAttrCase<"TF32", 8, "tf32">,
I32EnumAttrCase<"BF16", 9, "bf16">,
I32EnumAttrCase<"FP16", 10, "f16">
]> {
let cppNamespace = "::mlir::xevm";
}

def XeVM_DPASOp : XeVM_Op<"dpas">,
Results<(outs FixedVectorOf<[XeVM_MatrixElemType]>:$d)>,
Arguments<(ins
FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$c,
FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$a,
FixedVectorOfRankAndType<[1], [XeVM_MatrixElemType]>:$b,
XeVM_PrecisionTypeAttr:$pa,
XeVM_PrecisionTypeAttr:$pb,
I32Attr:$rc
)> {

let summary = "Matrix multiply-add";

let description = [{
The `xevm.dpas` operation is a matrix multiplication plus accumulation:

D = C + A x B

where the A, B, C input matrices and the result D have shapes:
D : MxN
C : MxN
A : MxK
B : KxN

Shape restrictions:
M : must be 1, 2, 4, or 8
N : fixed execution size, must be 16
K : systolic_depth * OPS_PER_CHAN
OPS_PER_CHAN
1 : for TF32
2 : for 16-bit precision(BF, HF)
4 : for 8-bit precision (FP8, UB, B)
8 : for less-then 8 bit precision (U4/S4, U2/S2).

If systolic_depth is 8, K would be 8, 16, 32, or 64 (based on OPS_PER_CHAN).
$a, $b, $c, $d - matrix A, B, C, D, respectively
$pa, $pb - precision of matrix A and B resepectively
$rc - repeat count

Further restrictions as well as more details can be found here:
https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_matrix_multiply_accumulate.html
}];

let assemblyFormat = [{
operands ` ` `{` `pa` `=` $pa `,` `pb` `=` $pb `,` `rc` `=` $rc `}` attr-dict `:` functional-type(operands, results)
}];

// let hasVerifier = 1;
}

def XeVM_TargetAttr : XeVM_Attr<"XeVMTarget", "target"> {
let description = [{
Expand Down
2 changes: 1 addition & 1 deletion include/gc/ExecutionEngine/Driver/Driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace mlir {
class DialectRegistry;
namespace gc {

const DialectRegistry &initCompilerAndGetDialects();
DialectRegistry &initCompilerAndGetDialects();

// the pointers to XXXMemRefType
using GeneralMemrefPtr = void *;
Expand Down
5 changes: 4 additions & 1 deletion include/gc/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
if(GC_ENABLE_DNNL_API)
list(APPEND TABLEGEN_MACROS -DGC_HAS_ONEDNN_DIALECT)
endif()
if(GC_ENABLE_GPU)
list(APPEND TABLEGEN_MACROS -DGC_USE_GPU)
endif()
if(GC_ENABLE_IMEX)
list(APPEND TABLEGEN_MACROS -DGC_USE_IMEX)
list(APPEND TABLEGEN_MACROS -DGC_USE_IMEX -DGC_USE_GPU)
endif()

set(LLVM_TARGET_DEFINITIONS Passes.td)
Expand Down
3 changes: 2 additions & 1 deletion include/gc/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ std::unique_ptr<Pass> createMergeAllocPass();
void populateFrontendPasses(mlir::OpPassManager &);
void populateCPUPipeline(mlir::OpPassManager &);

#ifdef GC_USE_IMEX
struct GPUPipelineOptions : PassPipelineOptions<GPUPipelineOptions> {
Option<bool> isUsmArgs{
*this, "is-usm-args",
Expand All @@ -136,6 +135,8 @@ struct GPUPipelineOptions : PassPipelineOptions<GPUPipelineOptions> {
llvm::cl::init(false)};
};
void populateGPUPipeline(mlir::OpPassManager &, const GPUPipelineOptions &);
#ifdef GC_USE_IMEX
void populateIMEXPipeline(mlir::OpPassManager &, const GPUPipelineOptions &);
#endif

#define GEN_PASS_DECL
Expand Down
27 changes: 15 additions & 12 deletions include/gc/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,21 @@ def LinalgToXeGPU : Pass<"linalg-to-xegpu", "func::FuncOp"> {
"DPAS register block sizes MxNxK">,
];
}
#endif

#ifdef GC_USE_GPU
def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
let summary = "Convert the GPU operations to GpuOclRuntime calls.";
let description = [{
Convert the gpu alloc, dealloc, memcpy and launch operations to GpuOclRuntime calls.
}];
let options = [
Option<"callFinish", "call-finish", "bool",
/*default=*/"false",
"Call finish() after each kernel launch.">
];
}
#endif // GC_USE_GPU

def AddContextArg : Pass<"add-ctx-arg", "func::FuncOp"> {
let summary = "Add a context argument.";
Expand All @@ -109,17 +124,6 @@ def AllocsToSLM : Pass<"allocs-to-slm", "func::FuncOp"> {
];
}

def GpuToGpuOcl : Pass<"gpu-to-gpuocl", "ModuleOp"> {
let summary = "Convert the GPU operations to GpuOclRuntime calls.";
let description = [{
Convert the gpu alloc, dealloc, memcpy and launch operations to GpuOclRuntime calls.
}];
let options = [
Option<"callFinish", "call-finish", "bool",
/*default=*/"false",
"Call finish() after each kernel launch.">
];
}

def GpuTilingAndFusion : Pass<"gpu-tiling", "func::FuncOp"> {
let summary = "GPU tiling and fusion path.";
Expand Down Expand Up @@ -185,7 +189,6 @@ def GpuXeVMAttachTarget: Pass<"xevm-attach-target", ""> {
];
}

#endif // GC_USE_IMEX

def IterativeTilingAndFusion : Pass<"iterative-tiling-and-fusion",
"func::FuncOp"> {
Expand Down
4 changes: 1 addition & 3 deletions lib/gc/CAPI/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ set(GC_ALL_LIBS
GcAnalysis
MLIRCPURuntimeTransforms)

if(GC_ENABLE_IMEX)
list(APPEND GC_ALL_LIBS GcGpuPasses)
endif()
list(APPEND GC_ALL_LIBS GcGpuPasses)

add_mlir_public_c_api_library(GcCAPI
Dialects.cpp
Expand Down
Loading
Loading