chore: reduce diff

aarnphm · aarnphm · commit 30622079aa4e · 2025-05-15T01:25:19.000-04:00
Signed-off-by: Aaron Pham &lt;contact@aarnphm.xyz&gt;
diff --git a/docs/source/features/structured_outputs.md b/docs/source/features/structured_outputs.md
@@ -24,7 +24,7 @@ You can see the complete list of supported parameters on the [OpenAI-Compatible
 
 Structured outputs are supported by default in the OpenAI-Compatible Server. You
 may choose to specify the backend to use by setting the
-`--structured-output-config '{"backend": '<supported_backend>'}'` flag to `vllm serve`. The default backend is `auto`,
+`--structured-output-config '{"backend": "<supported_backend>"}'` flag to `vllm serve`. The default backend is `auto`,
 which will try to choose an appropriate backend based on the details of the
 request. You may also choose a specific backend, along with
 some options. A full set of options is available in the `vllm serve --help`
diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -105,17 +105,15 @@ def test_structured_output(
     enforce_eager = bool(not current_platform.is_tpu())
     # Use a single LLM instance for several scenarios to
     # speed up the test suite.
-    llm = LLM(
-        model=model_name,
-        enforce_eager=enforce_eager,
-        max_model_len=1024,
-        tokenizer_mode=tokenizer_mode,
-        speculative_config=speculative_config,
-        structured_output_config={
-            "backend": structured_output_backend,
-            "disable_any_whitespace": True,
-        },
-    )
+    llm = LLM(model=model_name,
+              enforce_eager=enforce_eager,
+              max_model_len=1024,
+              structured_output_config={
+                  "backend": structured_output_backend,
+                  "disable_any_whitespace": True,
+              },
+              tokenizer_mode=tokenizer_mode,
+              speculative_config=speculative_config)
 
     #
     # Test 1: Generate JSON output based on a provided schema
@@ -619,8 +617,8 @@ def test_structured_output_auto_mode(
 
     llm = LLM(model=model_name,
               max_model_len=1024,
-              tokenizer_mode=tokenizer_mode,
-              structured_output_config={"backend": "auto"})
+              structured_output_config={"backend": "auto"},
+              tokenizer_mode=tokenizer_mode)
 
     sampling_params = SamplingParams(
         temperature=1.0,
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -2090,20 +2090,20 @@ def _build_logits_processors(
                 "LLMEngine. Params: %s", guided_decoding)
 
             tokenizer = self.get_tokenizer(lora_request=lora_request)
-            guided_decoding.backend = guided_decoding.backend or self.structured_output_config.backend  # noqa: E501
+            guided_decoding.backend = guided_decoding.backend or \
+                self.structured_output_config.backend
 
             if self.structured_output_config.reasoning_backend:
                 logger.debug("Building with reasoning backend %s",
                              self.structured_output_config.reasoning_backend)
 
-            # yapf: disable
             processor = get_local_guided_decoding_logits_processor(
                 guided_params=guided_decoding,
                 tokenizer=tokenizer,
                 model_config=self.model_config,
-                reasoning_backend=self.structured_output_config.reasoning_backend,
+                reasoning_backend=self.structured_output_config.
+                reasoning_backend,
             )
-            # yapf: enable
             if processor:
                 logits_processors.append(processor)