ggml-org · ggerganov · Apr 28, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/.devops/main-cuda.Dockerfile b/.devops/main-cuda.Dockerfile
@@ -13,8 +13,6 @@ WORKDIR /app
 ARG CUDA_DOCKER_ARCH=all
 # Set nvcc architecture
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
-# Enable cuBLAS
-ENV GGML_CUDA=1
 
 RUN apt-get update && \
     apt-get install -y build-essential libsdl2-dev wget cmake git \
@@ -25,7 +23,8 @@ ENV CUDA_MAIN_VERSION=12.3
 ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
 
 COPY .. .
-RUN make base.en
+# Enable cuBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_CUDA=1"
 
 FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
 ENV CUDA_MAIN_VERSION=12.3
@@ -37,4 +36,5 @@ RUN apt-get update && \
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
 COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
diff --git a/.devops/main-musa.Dockerfile b/.devops/main-musa.Dockerfile
@@ -0,0 +1,29 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG MUSA_VERSION=rc3.1.1
+# Target the MUSA build image
+ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the MUSA runtime image
+ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_MUSA_DEV_CONTAINER} AS build
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y build-essential libsdl2-dev wget cmake git \
+    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY .. .
+# Enable muBLAS
+RUN make base.en CMAKE_ARGS="-DGGML_MUSA=1"
+
+FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
+WORKDIR /app
+
+RUN apt-get update && \
+  apt-get install -y curl ffmpeg wget cmake git \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
+ENTRYPOINT [ "bash", "-c" ]
diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile
@@ -16,4 +16,5 @@ RUN apt-get update && \
   && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
 COPY --from=build /app /app
+ENV PATH=/app/build/bin:$PATH
 ENTRYPOINT [ "bash", "-c" ]
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,3 @@
+build*/
+.github/
+.devops/
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -18,6 +18,7 @@ jobs:
       matrix:
         config:
           - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64" }
+          - { tag: "main-musa", dockerfile: ".devops/main-musa.Dockerfile", platform: "linux/amd64" }
           #TODO: the cuda image keeps failing - disable for now
           #      https://github.com/ggerganov/whisper.cpp/actions/runs/11019444428/job/30602020339
           #- { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }

diff --git a/Makefile b/Makefile
@@ -4,7 +4,7 @@
 
 .PHONY: build
 build:
-	cmake -B build
+	cmake -B build $(CMAKE_ARGS)
 	cmake --build build --config Release
 
 # download a few audio samples into folder "./samples":
@@ -41,7 +41,7 @@ samples:
 
 tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo:
 	bash ./models/download-ggml-model.sh $@
-	cmake -B build
+	cmake -B build $(CMAKE_ARGS)
 	cmake --build build --config Release
 	@echo ""
 	@echo "==============================================="

diff --git a/README.md b/README.md
@@ -23,6 +23,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - [Efficient GPU support for NVIDIA](#nvidia-gpu-support)
 - [OpenVINO Support](#openvino-support)
 - [Ascend NPU Support](#ascend-npu-support)
+- [Moore Threads GPU Support](#moore-threads-gpu-support)
 - [C-style API](https://github.com/ggml-org/whisper.cpp/blob/master/include/whisper.h)
 
 Supported platforms:
@@ -381,6 +382,25 @@ Run the inference examples as usual, for example:
 - If you have trouble with Ascend NPU device, please create a issue with **[CANN]** prefix/tag.
 - If you run successfully with your Ascend NPU device, please help update the table `Verified devices`.
 
+## Moore Threads GPU support
+
+With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
+First, make sure you have installed `MUSA SDK rc3.1.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=rc3.1.1
+
+Now build `whisper.cpp` with MUSA support:
+
+```
+cmake -B build -DGGML_MUSA=1
+cmake --build build -j --config Release
+```
+
+or specify the architecture for your Moore Threads GPU. For example, if you have a MTT S80 GPU, you can specify the architecture as follows:
+
+```
+cmake -B build -DGGML_MUSA=1 -DMUSA_ARCHITECTURES="21"
+cmake --build build -j --config Release
+```
+
 ## FFmpeg support (Linux only)
 
 If you want to support more audio formats (such as Opus and AAC), you can turn on the `WHISPER_FFMPEG` build flag to enable FFmpeg integration.
@@ -425,6 +445,7 @@ We have two Docker images available for this project:
 
 1. `ghcr.io/ggml-org/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
 2. `ghcr.io/ggml-org/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+3. `ghcr.io/ggml-org/whisper.cpp:main-musa`: Same as `main` but compiled with MUSA support. (platforms: `linux/amd64`)
 
 ### Usage
 
@@ -437,11 +458,11 @@ docker run -it --rm \
 docker run -it --rm \
   -v path/to/models:/models \
   -v path/to/audios:/audios \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f /audios/jfk.wav"
 # transcribe an audio file in samples folder
 docker run -it --rm \
   -v path/to/models:/models \
-  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+  whisper.cpp:main "whisper-cli -m /models/ggml-base.bin -f ./samples/jfk.wav"
 ```
 
 ## Installing with Conan