Skip to content

Commit a257d9b

Browse files
authored
Improve configs - ObservabilityConfig (#17453)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 015069b commit a257d9b

File tree

2 files changed

+96
-66
lines changed

2 files changed

+96
-66
lines changed

vllm/config.py

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from contextlib import contextmanager
1515
from dataclasses import (MISSING, dataclass, field, fields, is_dataclass,
1616
replace)
17+
from functools import cached_property
1718
from importlib.util import find_spec
1819
from pathlib import Path
1920
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
@@ -26,6 +27,7 @@
2627
from typing_extensions import deprecated
2728

2829
import vllm.envs as envs
30+
from vllm import version
2931
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
3032
from vllm.logger import init_logger
3133
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
@@ -3285,20 +3287,55 @@ def _extract_backend_options(self):
32853287
self.disable_additional_properties = True
32863288

32873289

3290+
DetailedTraceModules = Literal["model", "worker", "all"]
3291+
3292+
3293+
@config
32883294
@dataclass
32893295
class ObservabilityConfig:
32903296
"""Configuration for observability - metrics and tracing."""
3291-
show_hidden_metrics: bool = False
3292-
3293-
otlp_traces_endpoint: Optional[str] = None
32943297

3295-
# Collecting detailed timing information for each request can be expensive.
3296-
3297-
# If set, collects the model forward time for the request.
3298-
collect_model_forward_time: bool = False
3298+
show_hidden_metrics_for_version: Optional[str] = None
3299+
"""Enable deprecated Prometheus metrics that have been hidden since the
3300+
specified version. For example, if a previously deprecated metric has been
3301+
hidden since the v0.7.0 release, you use
3302+
`--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while
3303+
you migrate to new metrics. The metric is likely to be removed completely
3304+
in an upcoming release."""
3305+
3306+
@cached_property
3307+
def show_hidden_metrics(self) -> bool:
3308+
"""Check if the hidden metrics should be shown."""
3309+
if self.show_hidden_metrics_for_version is None:
3310+
return False
3311+
return version._prev_minor_version_was(
3312+
self.show_hidden_metrics_for_version)
32993313

3300-
# If set, collects the model execute time for the request.
3301-
collect_model_execute_time: bool = False
3314+
otlp_traces_endpoint: Optional[str] = None
3315+
"""Target URL to which OpenTelemetry traces will be sent."""
3316+
3317+
collect_detailed_traces: Optional[list[DetailedTraceModules]] = None
3318+
"""It makes sense to set this only if `--otlp-traces-endpoint` is set. If
3319+
set, it will collect detailed traces for the specified modules. This
3320+
involves use of possibly costly and or blocking operations and hence might
3321+
have a performance impact.
3322+
3323+
Note that collecting detailed timing information for each request can be
3324+
expensive."""
3325+
3326+
@cached_property
3327+
def collect_model_forward_time(self) -> bool:
3328+
"""Whether to collect model forward time for the request."""
3329+
return (self.collect_detailed_traces is not None
3330+
and ("model" in self.collect_detailed_traces
3331+
or "all" in self.collect_detailed_traces))
3332+
3333+
@cached_property
3334+
def collect_model_execute_time(self) -> bool:
3335+
"""Whether to collect model execute time for the request."""
3336+
return (self.collect_detailed_traces is not None
3337+
and ("worker" in self.collect_detailed_traces
3338+
or "all" in self.collect_detailed_traces))
33023339

33033340
def compute_hash(self) -> str:
33043341
"""
@@ -3320,12 +3357,23 @@ def compute_hash(self) -> str:
33203357
return hash_str
33213358

33223359
def __post_init__(self):
3360+
if (self.collect_detailed_traces is not None
3361+
and len(self.collect_detailed_traces) == 1
3362+
and "," in self.collect_detailed_traces[0]):
3363+
self._parse_collect_detailed_traces()
3364+
33233365
if not is_otel_available() and self.otlp_traces_endpoint is not None:
33243366
raise ValueError(
33253367
"OpenTelemetry is not available. Unable to configure "
33263368
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
33273369
f"installed. Original error:\n{otel_import_error_traceback}")
33283370

3371+
def _parse_collect_detailed_traces(self):
3372+
assert isinstance(self.collect_detailed_traces, list)
3373+
self.collect_detailed_traces = cast(
3374+
list[DetailedTraceModules],
3375+
self.collect_detailed_traces[0].split(","))
3376+
33293377

33303378
class KVTransferConfig(BaseModel):
33313379
"""Configuration for distributed KV cache transfer."""

vllm/engine/arg_utils.py

Lines changed: 39 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,21 @@
77
import re
88
import threading
99
from dataclasses import MISSING, dataclass, fields
10+
from itertools import permutations
1011
from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
1112
TypeVar, Union, cast, get_args, get_origin)
1213

1314
import torch
1415
from typing_extensions import TypeIs, deprecated
1516

1617
import vllm.envs as envs
17-
from vllm import version
1818
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
19-
ConfigFormat, ConfigType, DecodingConfig, Device,
20-
DeviceConfig, DistributedExecutorBackend,
21-
GuidedDecodingBackend, GuidedDecodingBackendV1,
22-
HfOverrides, KVEventsConfig, KVTransferConfig,
23-
LoadConfig, LoadFormat, LoRAConfig, ModelConfig,
24-
ModelDType, ModelImpl, MultiModalConfig,
19+
ConfigFormat, ConfigType, DecodingConfig,
20+
DetailedTraceModules, Device, DeviceConfig,
21+
DistributedExecutorBackend, GuidedDecodingBackend,
22+
GuidedDecodingBackendV1, HfOverrides, KVEventsConfig,
23+
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
24+
ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
2525
ObservabilityConfig, ParallelConfig, PoolerConfig,
2626
PrefixCachingHashAlgo, PromptAdapterConfig,
2727
SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
@@ -41,8 +41,6 @@
4141

4242
logger = init_logger(__name__)
4343

44-
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
45-
4644
# object is used to allow for special typing forms
4745
T = TypeVar("T")
4846
TypeHint = Union[type[Any], object]
@@ -337,9 +335,12 @@ class EngineArgs:
337335
speculative_config: Optional[Dict[str, Any]] = None
338336

339337
qlora_adapter_name_or_path: Optional[str] = None
340-
show_hidden_metrics_for_version: Optional[str] = None
341-
otlp_traces_endpoint: Optional[str] = None
342-
collect_detailed_traces: Optional[str] = None
338+
show_hidden_metrics_for_version: Optional[str] = \
339+
ObservabilityConfig.show_hidden_metrics_for_version
340+
otlp_traces_endpoint: Optional[str] = \
341+
ObservabilityConfig.otlp_traces_endpoint
342+
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
343+
ObservabilityConfig.collect_detailed_traces
343344
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
344345
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
345346
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
@@ -677,33 +678,29 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
677678
default=None,
678679
help='Name or path of the QLoRA adapter.')
679680

680-
parser.add_argument('--show-hidden-metrics-for-version',
681-
type=str,
682-
default=None,
683-
help='Enable deprecated Prometheus metrics that '
684-
'have been hidden since the specified version. '
685-
'For example, if a previously deprecated metric '
686-
'has been hidden since the v0.7.0 release, you '
687-
'use --show-hidden-metrics-for-version=0.7 as a '
688-
'temporary escape hatch while you migrate to new '
689-
'metrics. The metric is likely to be removed '
690-
'completely in an upcoming release.')
691-
692-
parser.add_argument(
693-
'--otlp-traces-endpoint',
694-
type=str,
695-
default=None,
696-
help='Target URL to which OpenTelemetry traces will be sent.')
697-
parser.add_argument(
698-
'--collect-detailed-traces',
699-
type=str,
700-
default=None,
701-
help="Valid choices are " +
702-
",".join(ALLOWED_DETAILED_TRACE_MODULES) +
703-
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
704-
" set. If set, it will collect detailed traces for the specified "
705-
"modules. This involves use of possibly costly and or blocking "
706-
"operations and hence might have a performance impact.")
681+
# Observability arguments
682+
observability_kwargs = get_kwargs(ObservabilityConfig)
683+
observability_group = parser.add_argument_group(
684+
title="ObservabilityConfig",
685+
description=ObservabilityConfig.__doc__,
686+
)
687+
observability_group.add_argument(
688+
"--show-hidden-metrics-for-version",
689+
**observability_kwargs["show_hidden_metrics_for_version"])
690+
observability_group.add_argument(
691+
"--otlp-traces-endpoint",
692+
**observability_kwargs["otlp_traces_endpoint"])
693+
# TODO: generalise this special case
694+
choices = observability_kwargs["collect_detailed_traces"]["choices"]
695+
metavar = f"{{{','.join(choices)}}}"
696+
observability_kwargs["collect_detailed_traces"]["metavar"] = metavar
697+
observability_kwargs["collect_detailed_traces"]["choices"] += [
698+
",".join(p)
699+
for p in permutations(get_args(DetailedTraceModules), r=2)
700+
]
701+
observability_group.add_argument(
702+
"--collect-detailed-traces",
703+
**observability_kwargs["collect_detailed_traces"])
707704

708705
# Scheduler arguments
709706
scheduler_kwargs = get_kwargs(SchedulerConfig)
@@ -1094,26 +1091,11 @@ def create_engine_config(
10941091
if self.enable_reasoning else None,
10951092
)
10961093

1097-
show_hidden_metrics = False
1098-
if self.show_hidden_metrics_for_version is not None:
1099-
show_hidden_metrics = version._prev_minor_version_was(
1100-
self.show_hidden_metrics_for_version)
1101-
1102-
detailed_trace_modules = []
1103-
if self.collect_detailed_traces is not None:
1104-
detailed_trace_modules = self.collect_detailed_traces.split(",")
1105-
for m in detailed_trace_modules:
1106-
if m not in ALLOWED_DETAILED_TRACE_MODULES:
1107-
raise ValueError(
1108-
f"Invalid module {m} in collect_detailed_traces. "
1109-
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
11101094
observability_config = ObservabilityConfig(
1111-
show_hidden_metrics=show_hidden_metrics,
1095+
show_hidden_metrics_for_version=self.
1096+
show_hidden_metrics_for_version,
11121097
otlp_traces_endpoint=self.otlp_traces_endpoint,
1113-
collect_model_forward_time="model" in detailed_trace_modules
1114-
or "all" in detailed_trace_modules,
1115-
collect_model_execute_time="worker" in detailed_trace_modules
1116-
or "all" in detailed_trace_modules,
1098+
collect_detailed_traces=self.collect_detailed_traces,
11171099
)
11181100

11191101
config = VllmConfig(

0 commit comments

Comments
 (0)