@@ -118,7 +118,7 @@ steps:
118
118
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
119
119
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
120
120
- VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
121
- - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/
121
+ - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
122
122
- pytest -v -s entrypoints/test_chat_utils.py
123
123
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
124
124
@@ -341,6 +341,13 @@ steps:
341
341
commands :
342
342
- bash scripts/run-benchmarks.sh
343
343
344
+ - label : Benchmarks CLI Test # 10min
345
+ source_file_dependencies :
346
+ - vllm/
347
+ - tests/benchmarks/
348
+ commands :
349
+ - pytest -v -s benchmarks/
350
+
344
351
- label : Quantization Test # 33min
345
352
source_file_dependencies :
346
353
- csrc/
@@ -378,8 +385,10 @@ steps:
378
385
source_file_dependencies :
379
386
- vllm/
380
387
- tests/tool_use
388
+ - tests/mistral_tool_use
381
389
commands :
382
390
- pytest -v -s tool_use
391
+ - pytest -v -s mistral_tool_use
383
392
384
393
# #### models test #####
385
394
@@ -391,8 +400,9 @@ steps:
391
400
- pytest -v -s models/test_transformers.py
392
401
- pytest -v -s models/test_registry.py
393
402
# V1 Test: https://github.com/vllm-project/vllm/issues/14531
394
- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4'
403
+ - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2 '
395
404
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
405
+ - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
396
406
397
407
- label : Language Models Test (Standard) # 32min
398
408
# mirror_hardwares: [amd]
@@ -402,6 +412,8 @@ steps:
402
412
- tests/models/embedding/language
403
413
- tests/models/encoder_decoder/language
404
414
commands :
415
+ # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
416
+ - pip install causal-conv1d
405
417
- pytest -v -s models/decoder_only/language -m 'core_model or quant_model'
406
418
- pytest -v -s models/embedding/language -m core_model
407
419
@@ -413,6 +425,8 @@ steps:
413
425
- tests/models/embedding/language
414
426
- tests/models/encoder_decoder/language
415
427
commands :
428
+ # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
429
+ - pip install causal-conv1d
416
430
- pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
417
431
- pytest -v -s models/embedding/language -m 'not core_model'
418
432
@@ -538,6 +552,7 @@ steps:
538
552
# - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
539
553
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
540
554
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py
555
+ - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
541
556
542
557
- label : Plugin Tests (2 GPUs) # 40min
543
558
working_dir : " /vllm-workspace/tests"
0 commit comments