intel
diff --git a/‎.github/workflows/build-llvm.yml
Lines changed: 6 additions & 2 deletions b/‎.github/workflows/build-llvm.yml
Lines changed: 6 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 9 additions & 1 deletion b/‎README.md
Lines changed: 9 additions & 1 deletion
diff --git a/‎cmake/imex.cmake
Lines changed: 2 additions & 2 deletions b/‎cmake/imex.cmake
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmake/llvm-version.txt
Lines changed: 1 addition & 1 deletion b/‎cmake/llvm-version.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/gc/Analysis/MatmulConfigAnalysis.h
Lines changed: 13 additions & 19 deletions b/‎include/gc/Analysis/MatmulConfigAnalysis.h
Lines changed: 13 additions & 19 deletions
diff --git a/‎include/gc/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎include/gc/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/gc/Dialect/CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎include/gc/Dialect/CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/gc/Dialect/Microkernel/CMakeLists.txt
Lines changed: 5 additions & 0 deletions b/‎include/gc/Dialect/Microkernel/CMakeLists.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎include/gc/Dialect/Microkernel/MicrokernelDialect.h
Lines changed: 1 addition & 0 deletions b/‎include/gc/Dialect/Microkernel/MicrokernelDialect.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/gc/Dialect/Microkernel/MicrokernelDialect.td
Lines changed: 9 additions & 4 deletions b/‎include/gc/Dialect/Microkernel/MicrokernelDialect.td
Lines changed: 9 additions & 4 deletions
diff --git a/‎include/gc/Dialect/Microkernel/MicrokernelEnum.h
Lines changed: 18 additions & 0 deletions b/‎include/gc/Dialect/Microkernel/MicrokernelEnum.h
Lines changed: 18 additions & 0 deletions
diff --git a/‎include/gc/Dialect/Microkernel/MicrokernelEnum.td
Lines changed: 26 additions & 0 deletions b/‎include/gc/Dialect/Microkernel/MicrokernelEnum.td
Lines changed: 26 additions & 0 deletions
diff --git a/‎include/gc/Dialect/Microkernel/MicrokernelOps.h
Lines changed: 9 additions & 0 deletions b/‎include/gc/Dialect/Microkernel/MicrokernelOps.h
Lines changed: 9 additions & 0 deletions
diff --git a/‎include/gc/Dialect/Microkernel/MicrokernelOps.td
Lines changed: 103 additions & 1 deletion b/‎include/gc/Dialect/Microkernel/MicrokernelOps.td
Lines changed: 103 additions & 1 deletion
diff --git a/‎include/gc/Transforms/Passes.td
Lines changed: 2 additions & 2 deletions b/‎include/gc/Transforms/Passes.td
Lines changed: 2 additions & 2 deletions
@@ -2,13 +2,17 @@ name: LLVM Build
 
 on:
   workflow_dispatch:
+  push:
+    paths:
+      - cmake/llvm-version.txt
+      - .github/workflows/build-llvm.yml
 
 permissions: read-all
 
 jobs:
   build:
     name: Build
-    runs-on: [self-hosted, 0.0.1]
+    runs-on: [self-hosted]
 
     steps:
       - uses: actions/checkout@v4
@@ -27,7 +31,7 @@ jobs:
           python3 -m pip install -r mlir/python/requirements.txt
           mkdir llvm-install
           cmake -G Ninja llvm -B build -DCMAKE_INSTALL_PREFIX=llvm-install -DMLIR_ENABLE_BINDINGS_PYTHON=ON -DPython3_EXECUTABLE=$(which python3) \
-            -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=true -DLLVM_ENABLE_PROJECTS="mlir" -DLLVM_TARGETS_TO_BUILD="X86" -DLLVM_INSTALL_UTILS=true -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DLLVM_INSTALL_GTEST=ON
+            -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=true -DLLVM_ENABLE_PROJECTS="mlir" -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="SPIRV" -DLLVM_TARGETS_TO_BUILD="X86" -DLLVM_INSTALL_UTILS=true -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DLLVM_INSTALL_GTEST=ON
           cmake --build build --target install
           cd llvm-install
           tar -zcf ../llvm.tgz .
 
@@ -58,7 +58,15 @@ cmake --build . --target gc-check
 Notes:
  * `/PATH/TO/llvm-project/llvm-install` should be the install path of LLVM. If you installed LLVM elsewhere by `-DCMAKE_INSTALL_PREFIX` option when building LLVM, you need to change the path in `-DMLIR_DIR` accordingly.
  *  The cmake option `-DLLVM_EXTERNAL_LIT` is for the tests of this project. It requires the `lit` tool to be installed in the system. You can install it via `pip install lit`. If you don't need to run the tests of this repo, you can omit this option in the command line.
- * If GPU components are on (`-DGC_USE_GPU=ON`), make sure the Level-zero runtime is installed in your system. Either install Level-zero runtime via system package managers (e.g. `apt`), or follow the instructions of [IMEX](https://github.com/intel/mlir-extensions).
+
+More notes if GPU components are on (`-DGC_USE_GPU=ON`):
+ * make sure the OpenCL runtime is installed in your system. You can either
+  install using OS-provided package (Ubuntu 22.04)
+```sh
+sudo apt install -y intel-opencl-icd opencl-c-headers
+```
+  Or, download and install package from: https://github.com/intel/compute-runtime/releases
+ * the LLVM codebase needs to be patched to support XeGPU lowering (from IMEX). Please follow instructions of [IMEX](https://github.com/intel/mlir-extensions) on patching LLVM.
 
 Graph Compiler supports the following build-time options.
 
 
@@ -4,11 +4,11 @@ get_property(IMEX_INCLUDES GLOBAL PROPERTY IMEX_INCLUDES)
 if (NOT DEFINED IMEX_INCLUDES)
     include(functions)
     set(IMEX_CHECK_LLVM_VERSION ON)
-    set(IMEX_ENABLE_L0_RUNTIME 1)
+    set(IMEX_ENABLE_L0_RUNTIME 0)
     # TODO: Change to main https://github.com/oneapi-src/oneDNN.git when all the
     # required functionality is merged.
     gc_fetch_content(imex 496b240093b5e132b60c5ee69878300fe69be300 https://github.com/Menooker/mlir-extensions
-            CMAKE_ARGS "-DMLIR_DIR=${MLIR_DIR};-DIMEX_CHECK_LLVM_VERSION=ON;-DIMEX_ENABLE_L0_RUNTIME=1"
+            CMAKE_ARGS "-DMLIR_DIR=${MLIR_DIR};-DIMEX_CHECK_LLVM_VERSION=ON;-DIMEX_ENABLE_L0_RUNTIME=0"
     )
 
     set(IMEX_INCLUDES
 
@@ -1 +1 @@
-37661a17e26d9002ae9ade8c0de3932c22f16360
+89946bda5e1c7ceaf6d26634cc8c8c9498d9f7be
@@ -22,63 +22,57 @@ using namespace mlir;
 struct SystemDesc {
   // get runtime OMP_NUM_THREADS
   uint32_t getNumThreads() {
-    DataLayout layout = DataLayout(module);
-    MLIRContext *ctx = module->getContext();
     std::optional<Attribute> numThreads = layout.getDevicePropertyValue(
         Builder(ctx).getStringAttr("CPU" /* device ID*/),
         Builder(ctx).getStringAttr("num_threads"));
-    if (numThreads && dyn_cast<IntegerAttr>(*numThreads)) {
-      return cast<IntegerAttr>(*numThreads).getInt();
+    if (numThreads && isa<IntegerAttr>(*numThreads)) {
+      return dyn_cast<IntegerAttr>(*numThreads).getInt();
     }
     return 1;
   }
   // get cache size by cacheLevel
   size_t getCacheSize(uint8_t cacheLevel) {
-    DataLayout layout = DataLayout(module);
-    MLIRContext *ctx = module->getContext();
-
     if (cacheLevel == 1) {
       std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
           Builder(ctx).getStringAttr("CPU" /* device ID*/),
           Builder(ctx).getStringAttr("L1_cache_size_in_bytes"));
-      if (cacheSize && dyn_cast<IntegerAttr>(*cacheSize)) {
-        return cast<IntegerAttr>(*cacheSize).getInt();
+      if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
+        return dyn_cast<IntegerAttr>(*cacheSize).getInt();
       }
     } else if (cacheLevel == 2) {
       std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
           Builder(ctx).getStringAttr("CPU" /* device ID*/),
           Builder(ctx).getStringAttr("L2_cache_size_in_bytes"));
-      if (cacheSize && dyn_cast<IntegerAttr>(*cacheSize)) {
-        return cast<IntegerAttr>(*cacheSize).getInt();
+      if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
+        return dyn_cast<IntegerAttr>(*cacheSize).getInt();
       }
     } else if (cacheLevel == 3) {
       std::optional<Attribute> cacheSize = layout.getDevicePropertyValue(
           Builder(ctx).getStringAttr("CPU" /* device ID*/),
           Builder(ctx).getStringAttr("L3_cache_size_in_bytes"));
-      if (cacheSize && dyn_cast<IntegerAttr>(*cacheSize)) {
-        return cast<IntegerAttr>(*cacheSize).getInt();
+      if (cacheSize && isa<IntegerAttr>(*cacheSize)) {
+        return dyn_cast<IntegerAttr>(*cacheSize).getInt();
       }
     }
     return 0;
   }
 
   // get the maximum vector length in bits
   size_t getMaxVectorLength() {
-    DataLayout layout = DataLayout(module);
-    MLIRContext *ctx = module->getContext();
     std::optional<Attribute> maxVectorLength = layout.getDevicePropertyValue(
         Builder(ctx).getStringAttr("CPU" /* device ID*/),
         Builder(ctx).getStringAttr("max_vector_width"));
-    if (maxVectorLength && dyn_cast<IntegerAttr>(*maxVectorLength)) {
-      return cast<IntegerAttr>(*maxVectorLength).getInt();
+    if (maxVectorLength && isa<IntegerAttr>(*maxVectorLength)) {
+      return dyn_cast<IntegerAttr>(*maxVectorLength).getInt();
     }
     return 512;
   }
 
-  SystemDesc(ModuleOp m) : module(m) {}
+  SystemDesc(ModuleOp m) : layout(m), ctx(m->getContext()) {}
 
 private:
-  ModuleOp module;
+  DataLayout layout;
+  MLIRContext *ctx;
 };
 
 // The configuration for matmul tiling
 
@@ -1,2 +1,2 @@
 add_subdirectory(Dialect)
-add_subdirectory(Transforms)
+add_subdirectory(Transforms)
@@ -1,4 +1,4 @@
 add_subdirectory(CPURuntime)
 add_subdirectory(OneDNNGraph)
 add_subdirectory(Microkernel)
-add_subdirectory(Linalgx)
+add_subdirectory(Linalgx)
@@ -1,3 +1,8 @@
+set(LLVM_TARGET_DEFINITIONS MicrokernelEnum.td)
+mlir_tablegen(MicrokernelEnum.h.inc -gen-enum-decls)
+mlir_tablegen(MicrokernelEnum.cpp.inc -gen-enum-defs)
+add_public_tablegen_target(MLIRMicrokernelAttrDefIncGen)
+
 add_mlir_dialect(MicrokernelOps microkernel)
 add_mlir_doc(MicrokernelOps MicrokernelOps gc/Dialect/Microkernel/ -gen-op-doc)
 add_mlir_doc(MicrokernelDialect MicrokernelDialect gc/Dialect/Microkernel/ -gen-dialect-doc)
@@ -10,6 +10,7 @@
 #define GC_DIALECTS_MICROKERNELDIALECT_H
 
 #include "mlir/IR/Dialect.h"
+#include "mlir/Interfaces/DestinationStyleOpInterface.h"
 
 #include "gc/Dialect/Microkernel/MicrokernelOpsDialect.h.inc"
 
 
@@ -15,15 +15,20 @@ include "mlir/IR/OpBase.td"
 // Microkernel dialect definition.
 //===----------------------------------------------------------------------===//
 
-def MicrokernelDialect : Dialect {
+def Microkernel_Dialect : Dialect {
     let name = "microkernel";
     let summary = "A dialect for microkernel abstraction.";
     let description = [{
-        The dialect wraps the BRGEMM API to set up the HW context etc.
+        This dialect contains wrappers for microkernel primitives like BRGEMM.
     }];
     let cppNamespace = "::mlir::microkernel";
-
-    let useDefaultTypePrinterParser = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// Base microkernel operation definition.
+//===----------------------------------------------------------------------===//
+
+class Microkernel_Op<string mnemonic, list<Trait> traits = []> :
+        Op<Microkernel_Dialect, mnemonic, traits>;
+
 #endif // MICROKERNEL_DIALECT
@@ -0,0 +1,18 @@
+//===- MicrokernelEnum.h - microkernel dialect enums ------------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GC_DIALECTS_MICROKERNELENUM_H
+#define GC_DIALECTS_MICROKERNELENUM_H
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/DialectImplementation.h"
+
+#define GET_ATTRDEF_CLASSES
+#include "gc/Dialect/Microkernel/MicrokernelEnum.h.inc"
+
+#endif // GC_DIALECTS_MICROKERNELENUM_H
@@ -0,0 +1,26 @@
+//===- MicrokernelEnum.td - microkernel dialect enum -------*- tablegen -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MICROKERNEL_ENUM
+#define MICROKERNEL_ENUM
+
+include "mlir/IR/EnumAttr.td"
+include "gc/Dialect/Microkernel/MicrokernelDialect.td"
+
+def Microkernel_BrgemmFlags : I64EnumAttr<
+    "BrgemmFlags", "Flags for indicating optional behaviours of Brgemm",
+    [
+      I64EnumAttrCase<"NONE", 0, "none">,
+      I64EnumAttrCase<"BETA_0", 1, "beta_0">,
+      I64EnumAttrCase<"STRIDE", 2, "stride">,
+      I64EnumAttrCase<"LIST", 4, "list">
+    ]> {
+  let cppNamespace = "::mlir::microkernel";
+}
+
+#endif // MICROKERNEL_ENUM
@@ -9,7 +9,16 @@
 #ifndef GC_DIALECTS_MICROKERNELOPS_H
 #define GC_DIALECTS_MICROKERNELOPS_H
 
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+
+#include "gc/Dialect/Microkernel/MicrokernelDialect.h"
+#include "gc/Dialect/Microkernel/MicrokernelEnum.h"
 
 #define GET_OP_CLASSES
 #include "gc/Dialect/Microkernel/MicrokernelOps.h.inc"
 
@@ -10,5 +10,107 @@
 #define MICROKERNEL_OPS
 
 include "MicrokernelDialect.td"
+include "gc/Dialect/Microkernel/MicrokernelEnum.td"
+include "mlir/Interfaces/SideEffectInterfaces.td"
 
-#endif // MICROKERNEL_OPS
+class StaticMemRefRankOf<list<Type> allowedTypes, list<int> ranks> :
+     Type<And<[MemRefOf<allowedTypes>.predicate,
+              HasAnyRankOfPred<ranks>, HasStaticShapePred]>, 
+              !interleave(!foreach(rank, ranks, rank # "D"), "/") # " static " # 
+              MemRefOf<allowedTypes>.summary, "::mlir::MemRefType">;
+
+def Microkernel_BrgemmDispatchOp : Microkernel_Op<"brgemm.dispatch", [Pure]> {
+  let summary = "JIT the brgemm microkernel given the parameters";
+  let description = [{
+    The operation has the following arguments: 1) m, n, k, lda, ldb, ldc, stride_a and stride_b. 
+    Inputs is a dense attribute of I64 elements. 2) flags carry information on
+    the different flags that can be used for brgemm like whether beta == 0 or strided batch. For
+    more details, see: `Microkernel_BrgemmFlags`. 3) data_types of operand A & B.
+    Outpus is the id of JITed kernel.
+  }];
+
+  let arguments = (ins
+    ConfinedAttr<DenseI64ArrayAttr,
+                [DenseArrayNonNegative<DenseI64ArrayAttr>]>:$inputs,
+    TypedArrayAttrBase<Microkernel_BrgemmFlags, "brgemm flags">:$flags,
+    TypedArrayAttrBase<TypeAttr, "brgemm dtypes">:$data_type);
+
+  let results = (outs I64:$results);
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+}
+
+def Microkernel_BrgemmPrologueOp : Microkernel_Op<"brgemm.prologue"> {
+  let summary = "Prologue before executing the JITed brgemm "
+                "microkernel, and the context is considered core-level";
+  let description = [{
+    The operation has the following arguments: Input is the id of JITed kernel.
+    There is no output.
+  }];
+
+  let arguments = (ins I64:$inputs);
+
+  let assemblyFormat = [{
+    `(` $inputs `)`
+    attr-dict `:` functional-type($inputs, results)
+  }];
+}
+
+def Microkernel_BrgemmEpilogueOp : Microkernel_Op<"brgemm.epilogue"> {
+  let summary = "Epilogue after executing the JITed brgemm microkernel";
+  let description = [{
+    The operation has the following arguments: Input is the id of JITed kernel.
+    There is no output.
+  }];
+
+  let arguments = (ins I64:$inputs);
+
+  let assemblyFormat = [{
+    `(` $inputs `)`
+    attr-dict `:` functional-type($inputs, results)
+  }];
+}
+
+/* A generic input type of Microkernel_BrgemmOp, allowing for `BrgemmMemRef` and I64.
+ * The `BrgemmMemRef` should be a static MemRef, and for each operand its shape should be:
+ *     Operand A: StaticMemRefRankOf<[F32, BF16, SI8, UI8], [3]>;
+ *     Operand B (none-VNNI): StaticMemRefRankOf<[F32], [3]>;
+ *     Operand B (VNNI): StaticMemRefRankOf<[BF16, SI8, UI8], [4]>;
+ *     Operand C: StaticMemRefRankOf<[F32, SI32], [2]>;
+ */
+def BrgemmMemRefOrI64 : AnyTypeOf<[StaticMemRefRankOf<[F32, BF16, SI32, SI8, UI8], [2, 3, 4]>, I64]>;
+
+def Microkernel_BrgemmOp : Microkernel_Op<"brgemm"> {
+  let summary = "execute the JITed brgemm kernel.";
+  let description = [{
+    The operation has the following arguments: 
+    1) For stride mode, id of JITed kernel, MemRef of operand A/B/C, and the batch size;
+    2) For addr mode, plus the length of addr list at the end.
+    There is no output.
+  }];
+
+  let arguments = (ins Variadic<BrgemmMemRefOrI64>:$inputs);
+
+  let assemblyFormat = [{
+    `(` $inputs `)`
+    attr-dict `:` functional-type($inputs, results)
+  }];
+
+  let extraClassDeclaration = [{
+    Value getDispatch() { return getInputs()[0]; }
+
+    Value getOperandA() { return getInputs()[1]; }
+
+    Value getOperandB() { return getInputs()[2]; }
+
+    Value getOutput() { return getInputs()[3]; }
+
+    Value getBatch() { return getInputs()[4]; }
+
+    Value getAddrLen() { return getInputs()[5]; }
+  }];
+
+  let hasVerifier = 1;
+}
+
+#endif // MICROKERNEL_OPS
@@ -21,7 +21,7 @@ def ConvertOneDNNGraphToLinalg : Pass<"convert-onednn-graph-to-linalg"> {
   ];
 }
 
-
+#ifdef GC_USE_GPU
 def LinalgToXeGPU : Pass<"linalg-to-xegpu", "func::FuncOp"> {
   let summary = "Convert linalg dialect to XeGPU dialect.";
   let description = [{
@@ -46,6 +46,7 @@ def LinalgToXeGPU : Pass<"linalg-to-xegpu", "func::FuncOp"> {
                "DPAS register block sizes MxNxK">,
   ];
 }
+#endif
 
 def DeepTileContractionNamedOp
     : Pass<"deep-tile-contraction-named-op", "func::FuncOp"> {
@@ -73,7 +74,6 @@ def MergeNestedForall : Pass<"merge-nested-forall"> {
   let dependentDialects = ["scf::SCFDialect"];
 }
 
-
 def PropagateLayoutOnNamedOps : Pass<"propagate-layout-on-named-ops"> {
   let summary = "Insert and propagte tensor.pack to pack the computation of linalg named ops and tensor ops.";
   let description = [{
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-37661a17e26d9002ae9ade8c0de3932c22f16360`
	`1`	`+89946bda5e1c7ceaf6d26634cc8c8c9498d9f7be`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`add_subdirectory(Dialect)`
`2`		`-add_subdirectory(Transforms)`
	`2`	`+add_subdirectory(Transforms)`