Closed
Description
Your current environment
The output of `python collect_env.py`
Your output of `python collect_env.py` here
🐛 Describe the bug
The following backtrace showing that flash_attn_interface
cannot be found was observed after #17228 got merged:
Traceback (most recent call last):
File "/mnt/vllm/benchmarks/./ds.py", line 3, in <module>
llm = LLM(model="/mnt/model/DeepSeek-R1/DeepSeek-R1-UD-Q2_K_XL.gguf",
File "/mnt/vllm/vllm/utils.py", line 1161, in inner
return fn(*args, **kwargs)
File "/mnt/vllm/vllm/entrypoints/llm.py", line 247, in __init__
self.llm_engine = LLMEngine.from_engine_args(
File "/mnt/vllm/vllm/engine/llm_engine.py", line 516, in from_engine_args
return engine_cls.from_vllm_config(
File "/mnt/vllm/vllm/engine/llm_engine.py", line 492, in from_vllm_config
return cls(
File "/mnt/vllm/vllm/engine/llm_engine.py", line 281, in __init__
self.model_executor = executor_class(vllm_config=vllm_config, )
File "/mnt/vllm/vllm/executor/executor_base.py", line 286, in __init__
super().__init__(*args, **kwargs)
File "/mnt/vllm/vllm/executor/executor_base.py", line 52, in __init__
self._init_executor()
File "/mnt/vllm/vllm/executor/mp_distributed_executor.py", line 123, in _init_executor
self._run_workers("init_worker", all_kwargs)
File "/mnt/vllm/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
File "/mnt/vllm/vllm/utils.py", line 2456, in run_method
return func(*args, **kwargs)
File "/mnt/vllm/vllm/worker/worker_base.py", line 594, in init_worker
self.worker = worker_class(**kwargs)
File "/mnt/vllm/vllm/worker/worker.py", line 82, in __init__
self.model_runner: GPUModelRunnerBase = ModelRunnerClass(
File "/mnt/vllm/vllm/worker/model_runner.py", line 1071, in __init__
self.attn_backend = get_attn_backend(
File "/mnt/vllm/vllm/attention/selector.py", line 95, in get_attn_backend
return _cached_get_attn_backend(
File "/mnt/vllm/vllm/attention/selector.py", line 148, in _cached_get_attn_backend
attention_cls = current_platform.get_attn_backend_cls(
File "/mnt/vllm/vllm/platforms/rocm.py", line 145, in get_attn_backend_cls
from vllm.attention.backends.rocm_aiter_mla import (
File "/mnt/vllm/vllm/attention/backends/rocm_aiter_mla.py", line 11, in <module>
from vllm.attention.backends.mla.common import (MLACommonBackend,
File "/mnt/vllm/vllm/attention/backends/mla/common.py", line 217, in <module>
from vllm.vllm_flash_attn.fa_utils import get_flash_attn_version
File "/mnt/vllm/vllm/vllm_flash_attn/__init__.py", line 11, in <module>
from .flash_attn_interface import (fa_version_unsupported_reason,
ModuleNotFoundError: No module named 'vllm.vllm_flash_attn.flash_attn_interface'
This error message cannot be reproduced after rewinding back to the previous commit dc2ceca
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.