|
7 | 7 | import re
|
8 | 8 | import threading
|
9 | 9 | from dataclasses import MISSING, dataclass, fields
|
| 10 | +from itertools import permutations |
10 | 11 | from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
|
11 | 12 | TypeVar, Union, cast, get_args, get_origin)
|
12 | 13 |
|
13 | 14 | import torch
|
14 | 15 | from typing_extensions import TypeIs, deprecated
|
15 | 16 |
|
16 | 17 | import vllm.envs as envs
|
17 |
| -from vllm import version |
18 | 18 | from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
|
19 |
| - ConfigFormat, ConfigType, DecodingConfig, Device, |
20 |
| - DeviceConfig, DistributedExecutorBackend, |
21 |
| - GuidedDecodingBackend, GuidedDecodingBackendV1, |
22 |
| - HfOverrides, KVEventsConfig, KVTransferConfig, |
23 |
| - LoadConfig, LoadFormat, LoRAConfig, ModelConfig, |
24 |
| - ModelDType, ModelImpl, MultiModalConfig, |
| 19 | + ConfigFormat, ConfigType, DecodingConfig, |
| 20 | + DetailedTraceModules, Device, DeviceConfig, |
| 21 | + DistributedExecutorBackend, GuidedDecodingBackend, |
| 22 | + GuidedDecodingBackendV1, HfOverrides, KVEventsConfig, |
| 23 | + KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig, |
| 24 | + ModelConfig, ModelDType, ModelImpl, MultiModalConfig, |
25 | 25 | ObservabilityConfig, ParallelConfig, PoolerConfig,
|
26 | 26 | PrefixCachingHashAlgo, PromptAdapterConfig,
|
27 | 27 | SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
|
|
41 | 41 |
|
42 | 42 | logger = init_logger(__name__)
|
43 | 43 |
|
44 |
| -ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"] |
45 |
| - |
46 | 44 | # object is used to allow for special typing forms
|
47 | 45 | T = TypeVar("T")
|
48 | 46 | TypeHint = Union[type[Any], object]
|
@@ -337,9 +335,12 @@ class EngineArgs:
|
337 | 335 | speculative_config: Optional[Dict[str, Any]] = None
|
338 | 336 |
|
339 | 337 | qlora_adapter_name_or_path: Optional[str] = None
|
340 |
| - show_hidden_metrics_for_version: Optional[str] = None |
341 |
| - otlp_traces_endpoint: Optional[str] = None |
342 |
| - collect_detailed_traces: Optional[str] = None |
| 338 | + show_hidden_metrics_for_version: Optional[str] = \ |
| 339 | + ObservabilityConfig.show_hidden_metrics_for_version |
| 340 | + otlp_traces_endpoint: Optional[str] = \ |
| 341 | + ObservabilityConfig.otlp_traces_endpoint |
| 342 | + collect_detailed_traces: Optional[list[DetailedTraceModules]] = \ |
| 343 | + ObservabilityConfig.collect_detailed_traces |
343 | 344 | disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
|
344 | 345 | scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
|
345 | 346 | scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
|
@@ -677,33 +678,29 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
677 | 678 | default=None,
|
678 | 679 | help='Name or path of the QLoRA adapter.')
|
679 | 680 |
|
680 |
| - parser.add_argument('--show-hidden-metrics-for-version', |
681 |
| - type=str, |
682 |
| - default=None, |
683 |
| - help='Enable deprecated Prometheus metrics that ' |
684 |
| - 'have been hidden since the specified version. ' |
685 |
| - 'For example, if a previously deprecated metric ' |
686 |
| - 'has been hidden since the v0.7.0 release, you ' |
687 |
| - 'use --show-hidden-metrics-for-version=0.7 as a ' |
688 |
| - 'temporary escape hatch while you migrate to new ' |
689 |
| - 'metrics. The metric is likely to be removed ' |
690 |
| - 'completely in an upcoming release.') |
691 |
| - |
692 |
| - parser.add_argument( |
693 |
| - '--otlp-traces-endpoint', |
694 |
| - type=str, |
695 |
| - default=None, |
696 |
| - help='Target URL to which OpenTelemetry traces will be sent.') |
697 |
| - parser.add_argument( |
698 |
| - '--collect-detailed-traces', |
699 |
| - type=str, |
700 |
| - default=None, |
701 |
| - help="Valid choices are " + |
702 |
| - ",".join(ALLOWED_DETAILED_TRACE_MODULES) + |
703 |
| - ". It makes sense to set this only if ``--otlp-traces-endpoint`` is" |
704 |
| - " set. If set, it will collect detailed traces for the specified " |
705 |
| - "modules. This involves use of possibly costly and or blocking " |
706 |
| - "operations and hence might have a performance impact.") |
| 681 | + # Observability arguments |
| 682 | + observability_kwargs = get_kwargs(ObservabilityConfig) |
| 683 | + observability_group = parser.add_argument_group( |
| 684 | + title="ObservabilityConfig", |
| 685 | + description=ObservabilityConfig.__doc__, |
| 686 | + ) |
| 687 | + observability_group.add_argument( |
| 688 | + "--show-hidden-metrics-for-version", |
| 689 | + **observability_kwargs["show_hidden_metrics_for_version"]) |
| 690 | + observability_group.add_argument( |
| 691 | + "--otlp-traces-endpoint", |
| 692 | + **observability_kwargs["otlp_traces_endpoint"]) |
| 693 | + # TODO: generalise this special case |
| 694 | + choices = observability_kwargs["collect_detailed_traces"]["choices"] |
| 695 | + metavar = f"{{{','.join(choices)}}}" |
| 696 | + observability_kwargs["collect_detailed_traces"]["metavar"] = metavar |
| 697 | + observability_kwargs["collect_detailed_traces"]["choices"] += [ |
| 698 | + ",".join(p) |
| 699 | + for p in permutations(get_args(DetailedTraceModules), r=2) |
| 700 | + ] |
| 701 | + observability_group.add_argument( |
| 702 | + "--collect-detailed-traces", |
| 703 | + **observability_kwargs["collect_detailed_traces"]) |
707 | 704 |
|
708 | 705 | # Scheduler arguments
|
709 | 706 | scheduler_kwargs = get_kwargs(SchedulerConfig)
|
@@ -1094,26 +1091,11 @@ def create_engine_config(
|
1094 | 1091 | if self.enable_reasoning else None,
|
1095 | 1092 | )
|
1096 | 1093 |
|
1097 |
| - show_hidden_metrics = False |
1098 |
| - if self.show_hidden_metrics_for_version is not None: |
1099 |
| - show_hidden_metrics = version._prev_minor_version_was( |
1100 |
| - self.show_hidden_metrics_for_version) |
1101 |
| - |
1102 |
| - detailed_trace_modules = [] |
1103 |
| - if self.collect_detailed_traces is not None: |
1104 |
| - detailed_trace_modules = self.collect_detailed_traces.split(",") |
1105 |
| - for m in detailed_trace_modules: |
1106 |
| - if m not in ALLOWED_DETAILED_TRACE_MODULES: |
1107 |
| - raise ValueError( |
1108 |
| - f"Invalid module {m} in collect_detailed_traces. " |
1109 |
| - f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}") |
1110 | 1094 | observability_config = ObservabilityConfig(
|
1111 |
| - show_hidden_metrics=show_hidden_metrics, |
| 1095 | + show_hidden_metrics_for_version=self. |
| 1096 | + show_hidden_metrics_for_version, |
1112 | 1097 | otlp_traces_endpoint=self.otlp_traces_endpoint,
|
1113 |
| - collect_model_forward_time="model" in detailed_trace_modules |
1114 |
| - or "all" in detailed_trace_modules, |
1115 |
| - collect_model_execute_time="worker" in detailed_trace_modules |
1116 |
| - or "all" in detailed_trace_modules, |
| 1098 | + collect_detailed_traces=self.collect_detailed_traces, |
1117 | 1099 | )
|
1118 | 1100 |
|
1119 | 1101 | config = VllmConfig(
|
|
0 commit comments