fix: leaked thread from test_engine_core_client_asyncio

alec-flowers · alec-flowers · commit 511761796b51 · 2025-04-29T13:52:30.000-07:00
Signed-off-by: alec-flowers &lt;aflowers@nvidia.com&gt;
diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py
@@ -8,7 +8,6 @@
 
 import psutil
 import pytest
-import zmq
 from transformers import AutoTokenizer
 
 from vllm import SamplingParams
@@ -201,54 +200,57 @@ async def test_engine_core_client_asyncio(monkeypatch: pytest.MonkeyPatch):
             log_stats=True,
         )
 
-        MAX_TOKENS = 20
-        params = SamplingParams(max_tokens=MAX_TOKENS)
-        """Normal Request Cycle."""
-
-        requests = [make_request(params) for _ in range(10)]
-        request_ids = [req.request_id for req in requests]
-
-        # Add requests to the engine.
-        for request in requests:
-            await client.add_request_async(request)
-            await asyncio.sleep(0.01)
-
-        outputs: dict[str, list] = {req_id: [] for req_id in request_ids}
-        await loop_until_done_async(client, outputs)
+        try:
+            MAX_TOKENS = 20
+            params = SamplingParams(max_tokens=MAX_TOKENS)
+            """Normal Request Cycle."""
 
-        for req_id in request_ids:
-            assert len(outputs[req_id]) == MAX_TOKENS, (
-                f"{outputs[req_id]=}, {MAX_TOKENS=}")
-        """Abort Request Cycle."""
+            requests = [make_request(params) for _ in range(10)]
+            request_ids = [req.request_id for req in requests]
 
-        # Add requests to the engine.
-        for idx, request in enumerate(requests):
-            await client.add_request_async(request)
-            await asyncio.sleep(0.01)
-            if idx % 2 == 0:
-                await client.abort_requests_async([request.request_id])
+            # Add requests to the engine.
+            for request in requests:
+                await client.add_request_async(request)
+                await asyncio.sleep(0.01)
 
-        outputs = {req_id: [] for req_id in request_ids}
-        await loop_until_done_async(client, outputs)
+            outputs: dict[str, list] = {req_id: [] for req_id in request_ids}
+            await loop_until_done_async(client, outputs)
 
-        for idx, req_id in enumerate(request_ids):
-            if idx % 2 == 0:
-                assert len(outputs[req_id]) < MAX_TOKENS, (
-                    f"{len(outputs[req_id])=}, {MAX_TOKENS=}")
-            else:
+            for req_id in request_ids:
                 assert len(outputs[req_id]) == MAX_TOKENS, (
-                    f"{len(outputs[req_id])=}, {MAX_TOKENS=}")
-        """Utility method invocation"""
+                    f"{outputs[req_id]=}, {MAX_TOKENS=}")
+            """Abort Request Cycle."""
+
+            # Add requests to the engine.
+            for idx, request in enumerate(requests):
+                await client.add_request_async(request)
+                await asyncio.sleep(0.01)
+                if idx % 2 == 0:
+                    await client.abort_requests_async([request.request_id])
+
+            outputs = {req_id: [] for req_id in request_ids}
+            await loop_until_done_async(client, outputs)
+
+            for idx, req_id in enumerate(request_ids):
+                if idx % 2 == 0:
+                    assert len(outputs[req_id]) < MAX_TOKENS, (
+                        f"{len(outputs[req_id])=}, {MAX_TOKENS=}")
+                else:
+                    assert len(outputs[req_id]) == MAX_TOKENS, (
+                        f"{len(outputs[req_id])=}, {MAX_TOKENS=}")
+            """Utility method invocation"""
 
-        core_client: AsyncMPClient = client
+            core_client: AsyncMPClient = client
 
-        result = await core_client.call_utility_async("echo", "testarg")
-        assert result == "testarg"
+            result = await core_client.call_utility_async("echo", "testarg")
+            assert result == "testarg"
 
-        with pytest.raises(Exception) as e_info:
-            await core_client.call_utility_async("echo", None, "help!")
+            with pytest.raises(Exception) as e_info:
+                await core_client.call_utility_async("echo", None, "help!")
 
-        assert str(e_info.value) == "Call to echo method failed: help!"
+            assert str(e_info.value) == "Call to echo method failed: help!"
+        finally:
+            client.shutdown()
 
 
 @pytest.mark.parametrize(
@@ -333,10 +335,6 @@ def test_kv_cache_events(
                 "Token ids should be the same as the custom tokens")
         finally:
             client.shutdown()
-            subscriber.close()
-            # TODO hack to try and fix CI hang
-            ctx = zmq.Context.instance()
-            ctx.term()
         return