Skip to content

Improve configs - ObservabilityConfig #17453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 57 additions & 9 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from contextlib import contextmanager
from dataclasses import (MISSING, dataclass, field, fields, is_dataclass,
replace)
from functools import cached_property
from importlib.util import find_spec
from pathlib import Path
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
Expand All @@ -26,6 +27,7 @@
from typing_extensions import deprecated

import vllm.envs as envs
from vllm import version
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
Expand Down Expand Up @@ -3285,20 +3287,55 @@ def _extract_backend_options(self):
self.disable_additional_properties = True


DetailedTraceModules = Literal["model", "worker", "all"]


@config
@dataclass
class ObservabilityConfig:
"""Configuration for observability - metrics and tracing."""
show_hidden_metrics: bool = False

otlp_traces_endpoint: Optional[str] = None

# Collecting detailed timing information for each request can be expensive.

# If set, collects the model forward time for the request.
collect_model_forward_time: bool = False
show_hidden_metrics_for_version: Optional[str] = None
"""Enable deprecated Prometheus metrics that have been hidden since the
specified version. For example, if a previously deprecated metric has been
hidden since the v0.7.0 release, you use
`--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while
you migrate to new metrics. The metric is likely to be removed completely
in an upcoming release."""

@cached_property
def show_hidden_metrics(self) -> bool:
"""Check if the hidden metrics should be shown."""
if self.show_hidden_metrics_for_version is None:
return False
return version._prev_minor_version_was(
self.show_hidden_metrics_for_version)

# If set, collects the model execute time for the request.
collect_model_execute_time: bool = False
otlp_traces_endpoint: Optional[str] = None
"""Target URL to which OpenTelemetry traces will be sent."""

collect_detailed_traces: Optional[list[DetailedTraceModules]] = None
"""It makes sense to set this only if `--otlp-traces-endpoint` is set. If
set, it will collect detailed traces for the specified modules. This
involves use of possibly costly and or blocking operations and hence might
have a performance impact.

Note that collecting detailed timing information for each request can be
expensive."""

@cached_property
def collect_model_forward_time(self) -> bool:
"""Whether to collect model forward time for the request."""
return (self.collect_detailed_traces is not None
and ("model" in self.collect_detailed_traces
or "all" in self.collect_detailed_traces))

@cached_property
def collect_model_execute_time(self) -> bool:
"""Whether to collect model execute time for the request."""
return (self.collect_detailed_traces is not None
and ("worker" in self.collect_detailed_traces
or "all" in self.collect_detailed_traces))

def compute_hash(self) -> str:
"""
Expand All @@ -3320,12 +3357,23 @@ def compute_hash(self) -> str:
return hash_str

def __post_init__(self):
if (self.collect_detailed_traces is not None
and len(self.collect_detailed_traces) == 1
and "," in self.collect_detailed_traces[0]):
self._parse_collect_detailed_traces()

if not is_otel_available() and self.otlp_traces_endpoint is not None:
raise ValueError(
"OpenTelemetry is not available. Unable to configure "
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
f"installed. Original error:\n{otel_import_error_traceback}")

def _parse_collect_detailed_traces(self):
assert isinstance(self.collect_detailed_traces, list)
self.collect_detailed_traces = cast(
list[DetailedTraceModules],
self.collect_detailed_traces[0].split(","))


class KVTransferConfig(BaseModel):
"""Configuration for distributed KV cache transfer."""
Expand Down
96 changes: 39 additions & 57 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
import re
import threading
from dataclasses import MISSING, dataclass, fields
from itertools import permutations
from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
TypeVar, Union, cast, get_args, get_origin)

import torch
from typing_extensions import TypeIs, deprecated

import vllm.envs as envs
from vllm import version
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
ConfigFormat, ConfigType, DecodingConfig, Device,
DeviceConfig, DistributedExecutorBackend,
GuidedDecodingBackend, GuidedDecodingBackendV1,
HfOverrides, KVEventsConfig, KVTransferConfig,
LoadConfig, LoadFormat, LoRAConfig, ModelConfig,
ModelDType, ModelImpl, MultiModalConfig,
ConfigFormat, ConfigType, DecodingConfig,
DetailedTraceModules, Device, DeviceConfig,
DistributedExecutorBackend, GuidedDecodingBackend,
GuidedDecodingBackendV1, HfOverrides, KVEventsConfig,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
ObservabilityConfig, ParallelConfig, PoolerConfig,
PrefixCachingHashAlgo, PromptAdapterConfig,
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
Expand All @@ -41,8 +41,6 @@

logger = init_logger(__name__)

ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]

# object is used to allow for special typing forms
T = TypeVar("T")
TypeHint = Union[type[Any], object]
Expand Down Expand Up @@ -337,9 +335,12 @@ class EngineArgs:
speculative_config: Optional[Dict[str, Any]] = None

qlora_adapter_name_or_path: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = None
otlp_traces_endpoint: Optional[str] = None
collect_detailed_traces: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = \
ObservabilityConfig.show_hidden_metrics_for_version
otlp_traces_endpoint: Optional[str] = \
ObservabilityConfig.otlp_traces_endpoint
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
ObservabilityConfig.collect_detailed_traces
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
Expand Down Expand Up @@ -677,33 +678,29 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default=None,
help='Name or path of the QLoRA adapter.')

parser.add_argument('--show-hidden-metrics-for-version',
type=str,
default=None,
help='Enable deprecated Prometheus metrics that '
'have been hidden since the specified version. '
'For example, if a previously deprecated metric '
'has been hidden since the v0.7.0 release, you '
'use --show-hidden-metrics-for-version=0.7 as a '
'temporary escape hatch while you migrate to new '
'metrics. The metric is likely to be removed '
'completely in an upcoming release.')

parser.add_argument(
'--otlp-traces-endpoint',
type=str,
default=None,
help='Target URL to which OpenTelemetry traces will be sent.')
parser.add_argument(
'--collect-detailed-traces',
type=str,
default=None,
help="Valid choices are " +
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.")
# Observability arguments
observability_kwargs = get_kwargs(ObservabilityConfig)
observability_group = parser.add_argument_group(
title="ObservabilityConfig",
description=ObservabilityConfig.__doc__,
)
observability_group.add_argument(
"--show-hidden-metrics-for-version",
**observability_kwargs["show_hidden_metrics_for_version"])
observability_group.add_argument(
"--otlp-traces-endpoint",
**observability_kwargs["otlp_traces_endpoint"])
# TODO: generalise this special case
choices = observability_kwargs["collect_detailed_traces"]["choices"]
metavar = f"{{{','.join(choices)}}}"
observability_kwargs["collect_detailed_traces"]["metavar"] = metavar
observability_kwargs["collect_detailed_traces"]["choices"] += [
",".join(p)
for p in permutations(get_args(DetailedTraceModules), r=2)
]
observability_group.add_argument(
"--collect-detailed-traces",
**observability_kwargs["collect_detailed_traces"])

# Scheduler arguments
scheduler_kwargs = get_kwargs(SchedulerConfig)
Expand Down Expand Up @@ -1094,26 +1091,11 @@ def create_engine_config(
if self.enable_reasoning else None,
)

show_hidden_metrics = False
if self.show_hidden_metrics_for_version is not None:
show_hidden_metrics = version._prev_minor_version_was(
self.show_hidden_metrics_for_version)

detailed_trace_modules = []
if self.collect_detailed_traces is not None:
detailed_trace_modules = self.collect_detailed_traces.split(",")
for m in detailed_trace_modules:
if m not in ALLOWED_DETAILED_TRACE_MODULES:
raise ValueError(
f"Invalid module {m} in collect_detailed_traces. "
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
observability_config = ObservabilityConfig(
show_hidden_metrics=show_hidden_metrics,
show_hidden_metrics_for_version=self.
show_hidden_metrics_for_version,
otlp_traces_endpoint=self.otlp_traces_endpoint,
collect_model_forward_time="model" in detailed_trace_modules
or "all" in detailed_trace_modules,
collect_model_execute_time="worker" in detailed_trace_modules
or "all" in detailed_trace_modules,
collect_detailed_traces=self.collect_detailed_traces,
)

config = VllmConfig(
Expand Down