From a800cb5b7ccaa41d31a49d221d69b1a5006c6ac2 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Wed, 30 Apr 2025 08:21:58 +0000 Subject: [PATCH 01/10] [CI/Build] Reorganize models tests Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 76 ++++++++----------- tests/conftest.py | 2 +- tests/entrypoints/openai/test_embedding.py | 12 +-- .../openai/test_embedding_dimensions.py | 5 +- tests/models/embedding/language/__init__.py | 0 tests/models/embedding/utils.py | 66 ---------------- .../embedding/vision_language/__init__.py | 0 tests/models/encoder_decoder/__init__.py | 0 .../audio_language/__init__.py | 0 .../encoder_decoder/language/__init__.py | 0 .../vision_language/__init__.py | 0 .../{decoder_only => language}/__init__.py | 0 .../generation}/__init__.py | 0 .../generation}/test_aqlm.py | 0 .../generation}/test_bart.py | 0 .../generation}/test_fp8.py | 0 .../generation}/test_gguf.py | 0 .../generation}/test_gptq_marlin.py | 0 .../generation}/test_gptq_marlin_24.py | 0 .../generation}/test_granite.py | 0 .../generation}/test_hybrid.py | 0 .../generation}/test_mistral.py | 0 .../generation}/test_modelopt.py | 0 .../generation}/test_models.py | 0 .../generation}/test_nvfp4.py | 0 .../generation}/test_phimoe.py | 0 .../language => language/pooling}/__init__.py | 0 .../pooling}/test_cls_models.py | 0 .../pooling}/test_embedding.py | 2 +- .../pooling}/test_gritlm.py | 0 .../pooling}/test_jina.py | 3 +- .../pooling}/test_scoring.py | 0 .../pooling}/test_snowflake_arctic_embed.py | 4 +- .../pooling}/test_truncation_control.py | 0 .../generation}/__init__.py | 0 .../generation}/test_awq.py | 0 .../generation}/test_broadcast.py | 0 .../generation/test_common.py} | 0 .../generation}/test_florence2.py | 0 .../generation}/test_granite_speech.py | 0 .../generation}/test_interleaved.py | 1 + .../generation}/test_intern_vit.py | 0 .../generation}/test_mllama.py | 0 .../generation}/test_phi4mm.py | 0 .../generation}/test_pixtral.py | 0 .../generation}/test_qwen2_vl.py | 0 .../generation}/test_ultravox.py | 0 .../generation}/test_whisper.py | 0 .../generation}/vlm_utils/__init__.py | 0 .../generation}/vlm_utils/builders.py | 0 .../generation}/vlm_utils/case_filtering.py | 0 .../generation}/vlm_utils/core.py | 0 .../generation}/vlm_utils/custom_inputs.py | 0 .../generation}/vlm_utils/model_utils.py | 0 .../generation}/vlm_utils/runners.py | 0 .../generation}/vlm_utils/types.py | 0 .../pooling}/__init__.py | 0 .../pooling}/test_dse_qwen2_vl.py | 2 +- .../pooling}/test_llava_next.py | 2 +- .../pooling}/test_phi3v.py | 2 +- tests/models/utils.py | 64 +++++++++++++++- 61 files changed, 111 insertions(+), 130 deletions(-) delete mode 100644 tests/models/embedding/language/__init__.py delete mode 100644 tests/models/embedding/utils.py delete mode 100644 tests/models/embedding/vision_language/__init__.py delete mode 100644 tests/models/encoder_decoder/__init__.py delete mode 100644 tests/models/encoder_decoder/audio_language/__init__.py delete mode 100644 tests/models/encoder_decoder/language/__init__.py delete mode 100644 tests/models/encoder_decoder/vision_language/__init__.py rename tests/models/{decoder_only => language}/__init__.py (100%) rename tests/models/{decoder_only/audio_language => language/generation}/__init__.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_aqlm.py (100%) rename tests/models/{encoder_decoder/language => language/generation}/test_bart.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_fp8.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_gguf.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_gptq_marlin.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_gptq_marlin_24.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_granite.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_hybrid.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_mistral.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_modelopt.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_models.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_nvfp4.py (100%) rename tests/models/{decoder_only/language => language/generation}/test_phimoe.py (100%) rename tests/models/{decoder_only/language => language/pooling}/__init__.py (100%) rename tests/models/{embedding/language => language/pooling}/test_cls_models.py (100%) rename tests/models/{embedding/language => language/pooling}/test_embedding.py (98%) rename tests/models/{embedding/language => language/pooling}/test_gritlm.py (100%) rename tests/models/{embedding/language => language/pooling}/test_jina.py (98%) rename tests/models/{embedding/language => language/pooling}/test_scoring.py (100%) rename tests/models/{embedding/language => language/pooling}/test_snowflake_arctic_embed.py (97%) rename tests/models/{embedding/language => language/pooling}/test_truncation_control.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/__init__.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_awq.py (100%) rename tests/models/{encoder_decoder/vision_language => multimodal/generation}/test_broadcast.py (100%) rename tests/models/{decoder_only/vision_language/test_models.py => multimodal/generation/test_common.py} (100%) rename tests/models/{encoder_decoder/vision_language => multimodal/generation}/test_florence2.py (100%) rename tests/models/{decoder_only/audio_language => multimodal/generation}/test_granite_speech.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_interleaved.py (99%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_intern_vit.py (100%) rename tests/models/{encoder_decoder/vision_language => multimodal/generation}/test_mllama.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_phi4mm.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_pixtral.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/test_qwen2_vl.py (100%) rename tests/models/{decoder_only/audio_language => multimodal/generation}/test_ultravox.py (100%) rename tests/models/{encoder_decoder/audio_language => multimodal/generation}/test_whisper.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/__init__.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/builders.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/case_filtering.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/core.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/custom_inputs.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/model_utils.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/runners.py (100%) rename tests/models/{decoder_only/vision_language => multimodal/generation}/vlm_utils/types.py (100%) rename tests/models/{embedding => multimodal/pooling}/__init__.py (100%) rename tests/models/{embedding/vision_language => multimodal/pooling}/test_dse_qwen2_vl.py (99%) rename tests/models/{embedding/vision_language => multimodal/pooling}/test_llava_next.py (99%) rename tests/models/{embedding/vision_language => multimodal/pooling}/test_phi3v.py (98%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 8da43322c5c..03205e1d3e4 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -446,77 +446,62 @@ steps: - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4' - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2' -- label: Language Models Test (Standard) # 32min +- label: Language Models Test (Standard) #mirror_hardwares: [amd] source_file_dependencies: - vllm/ - - tests/models/decoder_only/language - - tests/models/embedding/language - - tests/models/encoder_decoder/language + - tests/models/language commands: # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' - - pytest -v -s models/decoder_only/language -m 'core_model or quant_model' - - pytest -v -s models/embedding/language -m core_model + - pytest -v -s models/language -m 'core_model or quant_model' -- label: Language Models Test (Extended) # 1h10min +- label: Language Models Test (Extended) optional: true source_file_dependencies: - vllm/ - - tests/models/decoder_only/language - - tests/models/embedding/language - - tests/models/encoder_decoder/language + - tests/models/language commands: # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. - - pip install causal-conv1d - - pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model' - - pytest -v -s models/embedding/language -m 'not core_model' + - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' + - pytest -v -s models/language -m 'not core_model and not quant_model' -- label: Multi-Modal Models Test (Standard) # 40min +- label: Multi-Modal Models Test (Standard) #mirror_hardwares: [amd] source_file_dependencies: - vllm/ - - tests/models/decoder_only/audio_language - - tests/models/decoder_only/vision_language - - tests/models/embedding/vision_language - - tests/models/encoder_decoder/audio_language - - tests/models/encoder_decoder/vision_language + - tests/models/multimodal commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/multimodal - - pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model' - - pytest -v -s models/decoder_only/vision_language -m 'core_model or quant_model' - - pytest -v -s models/embedding/vision_language -m core_model - - pytest -v -s models/encoder_decoder/audio_language -m core_model - - pytest -v -s models/encoder_decoder/language -m core_model - - pytest -v -s models/encoder_decoder/vision_language -m core_model - - pytest -v -s models/decoder_only/vision_language/test_interleaved.py - -- label: Multi-Modal Models Test (Extended) 1 # 48m + - pytest -v -s models/multimodal -m 'core_model or quant_model' + - pytest -v -s models/multimodal/processing + +- label: Multi-Modal Models Test (Extended) 1 optional: true source_file_dependencies: - vllm/ - - tests/models/decoder_only/audio_language - - tests/models/decoder_only/vision_language - - tests/models/embedding/vision_language - - tests/models/encoder_decoder/vision_language + - tests/models/multimodal commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model' - - pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=0) and not core_model and not quant_model' - - pytest -v -s --ignore models/decoder_only/vision_language/test_models.py models/decoder_only/vision_language -m 'not core_model and not quant_model' - - pytest -v -s models/embedding/vision_language -m 'not core_model' - - pytest -v -s models/encoder_decoder/language -m 'not core_model' - - pytest -v -s models/encoder_decoder/vision_language -m 'not core_model' - -- label: Multi-Modal Models Test (Extended) 2 # 38m + - pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model and not quant_model' + +- label: Multi-Modal Models Test (Extended) 2 + optional: true + source_file_dependencies: + - vllm/ + - tests/models/multimodal + commands: + - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git + - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model and not quant_model' + +- label: Multi-Modal Models Test (Extended) 3 optional: true source_file_dependencies: - vllm/ - - tests/models/decoder_only/vision_language + - tests/models/multimodal commands: - pip install git+https://github.com/TIGER-AI-Lab/Mantis.git - - pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=1) and not core_model and not quant_model' + - pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model and not quant_model' # This test is used only in PR development phase to test individual models and should never run on main - label: Custom Models Test @@ -586,9 +571,8 @@ steps: - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)' # Avoid importing model tests that cause CUDA reinitialization error - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)' - - pytest models/encoder_decoder/language/test_bart.py -v -s -m 'distributed(num_gpus=2)' - - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m 'distributed(num_gpus=2)' - - pytest models/decoder_only/vision_language/test_models.py -v -s -m 'distributed(num_gpus=2)' + - pytest models/language -v -s -m 'distributed(num_gpus=2)' + - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' # test sequence parallel - pytest -v -s distributed/test_sequence_parallel.py # this test fails consistently. diff --git a/tests/conftest.py b/tests/conftest.py index f02b5a8c052..00bb6a442c6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -707,7 +707,7 @@ def generate_encoder_decoder_greedy_logprobs_limit( return [(output_ids, output_str, output_logprobs) for output_ids, output_str, output_logprobs in outputs] - def encode(self, prompts: list[str], *args, + def encode(self, prompts: list[list[str]], *args, **kwargs) -> list[list[torch.Tensor]]: return self.model.encode(prompts, *args, **kwargs) diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/openai/test_embedding.py index 50b20e78c4c..1019bfd5893 100644 --- a/tests/entrypoints/openai/test_embedding.py +++ b/tests/entrypoints/openai/test_embedding.py @@ -11,7 +11,7 @@ from vllm.entrypoints.openai.protocol import EmbeddingResponse from vllm.transformers_utils.tokenizer import get_tokenizer -from ...models.embedding.utils import correctness_test +from ...models.utils import run_embedding_correctness_test from ...utils import RemoteOpenAIServer MODEL_NAME = "intfloat/multilingual-e5-small" @@ -76,7 +76,7 @@ async def test_single_embedding(hf_model, client: openai.AsyncOpenAI, assert embeddings.usage.total_tokens == 11 vllm_outputs = [d.embedding for d in embeddings.data] - correctness_test(hf_model, input_texts, vllm_outputs) + run_embedding_correctness_test(hf_model, input_texts, vllm_outputs) # test using token IDs input_tokens = [1, 1, 1, 1, 1] @@ -121,7 +121,7 @@ async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI, assert embeddings.usage.total_tokens == 33 vllm_outputs = [d.embedding for d in embeddings.data] - correctness_test(hf_model, input_texts, vllm_outputs) + run_embedding_correctness_test(hf_model, input_texts, vllm_outputs) # test list[list[int]] input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24], @@ -208,7 +208,7 @@ async def test_batch_base64_embedding(hf_model, client: openai.AsyncOpenAI, model=model_name, encoding_format="float") float_data = [d.embedding for d in responses_float.data] - correctness_test(hf_model, input_texts, float_data) + run_embedding_correctness_test(hf_model, input_texts, float_data) responses_base64 = await client.embeddings.create(input=input_texts, model=model_name, @@ -219,13 +219,13 @@ async def test_batch_base64_embedding(hf_model, client: openai.AsyncOpenAI, np.frombuffer(base64.b64decode(data.embedding), dtype="float32").tolist()) - correctness_test(hf_model, input_texts, base64_data) + run_embedding_correctness_test(hf_model, input_texts, base64_data) # Default response is float32 decoded from base64 by OpenAI Client responses_default = await client.embeddings.create(input=input_texts, model=model_name) default_data = [d.embedding for d in responses_default.data] - correctness_test(hf_model, input_texts, default_data) + run_embedding_correctness_test(hf_model, input_texts, default_data) @pytest.mark.asyncio diff --git a/tests/entrypoints/openai/test_embedding_dimensions.py b/tests/entrypoints/openai/test_embedding_dimensions.py index 9f5a8c6839b..332fa332a4a 100644 --- a/tests/entrypoints/openai/test_embedding_dimensions.py +++ b/tests/entrypoints/openai/test_embedding_dimensions.py @@ -11,7 +11,7 @@ from vllm.entrypoints.openai.protocol import EmbeddingResponse from ...conftest import HfRunner -from ...models.embedding.utils import EmbedModelInfo, correctness_test +from ...models.utils import EmbedModelInfo, run_embedding_correctness_test from ...utils import RemoteOpenAIServer MODELS = [ @@ -95,7 +95,8 @@ async def make_request_and_correctness_test(dimensions): assert len(embeddings.data[0].embedding) == dimensions vllm_outputs = [d.embedding for d in embeddings.data] - correctness_test(hf_model, prompts, vllm_outputs, dimensions) + run_embedding_correctness_test(hf_model, prompts, vllm_outputs, + dimensions) if model_info.is_matryoshka: valid_dimensions: list[Optional[int]] = [None] diff --git a/tests/models/embedding/language/__init__.py b/tests/models/embedding/language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/embedding/utils.py b/tests/models/embedding/utils.py deleted file mode 100644 index 6d4df2c265c..00000000000 --- a/tests/models/embedding/utils.py +++ /dev/null @@ -1,66 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -from collections.abc import Sequence -from typing import NamedTuple, Optional - -import torch -import torch.nn.functional as F - - -def check_embeddings_close( - *, - embeddings_0_lst: Sequence[list[float]], - embeddings_1_lst: Sequence[list[float]], - name_0: str, - name_1: str, - tol: float = 1e-3, -) -> None: - assert len(embeddings_0_lst) == len(embeddings_1_lst) - - for prompt_idx, (embeddings_0, embeddings_1) in enumerate( - zip(embeddings_0_lst, embeddings_1_lst)): - assert len(embeddings_0) == len(embeddings_1), ( - f"Length mismatch: {len(embeddings_0)} vs. {len(embeddings_1)}") - - sim = F.cosine_similarity(torch.tensor(embeddings_0), - torch.tensor(embeddings_1), - dim=0) - - fail_msg = (f"Test{prompt_idx}:" - f"\n{name_0}:\t{embeddings_0[:16]!r}" - f"\n{name_1}:\t{embeddings_1[:16]!r}") - - assert sim >= 1 - tol, fail_msg - - -def matryoshka_fy(tensor, dimensions): - tensor = torch.tensor(tensor) - tensor = tensor[..., :dimensions] - tensor = F.normalize(tensor, p=2, dim=1) - return tensor - - -class EmbedModelInfo(NamedTuple): - name: str - is_matryoshka: bool - matryoshka_dimensions: Optional[list[int]] = None - architecture: str = "" - enable_test: bool = True - - -def correctness_test(hf_model, - inputs, - vllm_outputs: Sequence[list[float]], - dimensions: Optional[int] = None): - - hf_outputs = hf_model.encode(inputs) - if dimensions: - hf_outputs = matryoshka_fy(hf_outputs, dimensions) - - check_embeddings_close( - embeddings_0_lst=hf_outputs, - embeddings_1_lst=vllm_outputs, - name_0="hf", - name_1="vllm", - tol=1e-2, - ) diff --git a/tests/models/embedding/vision_language/__init__.py b/tests/models/embedding/vision_language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/__init__.py b/tests/models/encoder_decoder/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/audio_language/__init__.py b/tests/models/encoder_decoder/audio_language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/language/__init__.py b/tests/models/encoder_decoder/language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/encoder_decoder/vision_language/__init__.py b/tests/models/encoder_decoder/vision_language/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/models/decoder_only/__init__.py b/tests/models/language/__init__.py similarity index 100% rename from tests/models/decoder_only/__init__.py rename to tests/models/language/__init__.py diff --git a/tests/models/decoder_only/audio_language/__init__.py b/tests/models/language/generation/__init__.py similarity index 100% rename from tests/models/decoder_only/audio_language/__init__.py rename to tests/models/language/generation/__init__.py diff --git a/tests/models/decoder_only/language/test_aqlm.py b/tests/models/language/generation/test_aqlm.py similarity index 100% rename from tests/models/decoder_only/language/test_aqlm.py rename to tests/models/language/generation/test_aqlm.py diff --git a/tests/models/encoder_decoder/language/test_bart.py b/tests/models/language/generation/test_bart.py similarity index 100% rename from tests/models/encoder_decoder/language/test_bart.py rename to tests/models/language/generation/test_bart.py diff --git a/tests/models/decoder_only/language/test_fp8.py b/tests/models/language/generation/test_fp8.py similarity index 100% rename from tests/models/decoder_only/language/test_fp8.py rename to tests/models/language/generation/test_fp8.py diff --git a/tests/models/decoder_only/language/test_gguf.py b/tests/models/language/generation/test_gguf.py similarity index 100% rename from tests/models/decoder_only/language/test_gguf.py rename to tests/models/language/generation/test_gguf.py diff --git a/tests/models/decoder_only/language/test_gptq_marlin.py b/tests/models/language/generation/test_gptq_marlin.py similarity index 100% rename from tests/models/decoder_only/language/test_gptq_marlin.py rename to tests/models/language/generation/test_gptq_marlin.py diff --git a/tests/models/decoder_only/language/test_gptq_marlin_24.py b/tests/models/language/generation/test_gptq_marlin_24.py similarity index 100% rename from tests/models/decoder_only/language/test_gptq_marlin_24.py rename to tests/models/language/generation/test_gptq_marlin_24.py diff --git a/tests/models/decoder_only/language/test_granite.py b/tests/models/language/generation/test_granite.py similarity index 100% rename from tests/models/decoder_only/language/test_granite.py rename to tests/models/language/generation/test_granite.py diff --git a/tests/models/decoder_only/language/test_hybrid.py b/tests/models/language/generation/test_hybrid.py similarity index 100% rename from tests/models/decoder_only/language/test_hybrid.py rename to tests/models/language/generation/test_hybrid.py diff --git a/tests/models/decoder_only/language/test_mistral.py b/tests/models/language/generation/test_mistral.py similarity index 100% rename from tests/models/decoder_only/language/test_mistral.py rename to tests/models/language/generation/test_mistral.py diff --git a/tests/models/decoder_only/language/test_modelopt.py b/tests/models/language/generation/test_modelopt.py similarity index 100% rename from tests/models/decoder_only/language/test_modelopt.py rename to tests/models/language/generation/test_modelopt.py diff --git a/tests/models/decoder_only/language/test_models.py b/tests/models/language/generation/test_models.py similarity index 100% rename from tests/models/decoder_only/language/test_models.py rename to tests/models/language/generation/test_models.py diff --git a/tests/models/decoder_only/language/test_nvfp4.py b/tests/models/language/generation/test_nvfp4.py similarity index 100% rename from tests/models/decoder_only/language/test_nvfp4.py rename to tests/models/language/generation/test_nvfp4.py diff --git a/tests/models/decoder_only/language/test_phimoe.py b/tests/models/language/generation/test_phimoe.py similarity index 100% rename from tests/models/decoder_only/language/test_phimoe.py rename to tests/models/language/generation/test_phimoe.py diff --git a/tests/models/decoder_only/language/__init__.py b/tests/models/language/pooling/__init__.py similarity index 100% rename from tests/models/decoder_only/language/__init__.py rename to tests/models/language/pooling/__init__.py diff --git a/tests/models/embedding/language/test_cls_models.py b/tests/models/language/pooling/test_cls_models.py similarity index 100% rename from tests/models/embedding/language/test_cls_models.py rename to tests/models/language/pooling/test_cls_models.py diff --git a/tests/models/embedding/language/test_embedding.py b/tests/models/language/pooling/test_embedding.py similarity index 98% rename from tests/models/embedding/language/test_embedding.py rename to tests/models/language/pooling/test_embedding.py index 5deb35fa321..2a90f47af54 100644 --- a/tests/models/embedding/language/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -8,7 +8,7 @@ from vllm.config import PoolerConfig from vllm.platforms import current_platform -from ..utils import check_embeddings_close +from ...utils import check_embeddings_close @pytest.mark.parametrize( diff --git a/tests/models/embedding/language/test_gritlm.py b/tests/models/language/pooling/test_gritlm.py similarity index 100% rename from tests/models/embedding/language/test_gritlm.py rename to tests/models/language/pooling/test_gritlm.py diff --git a/tests/models/embedding/language/test_jina.py b/tests/models/language/pooling/test_jina.py similarity index 98% rename from tests/models/embedding/language/test_jina.py rename to tests/models/language/pooling/test_jina.py index 1e234368f3b..154aefe594a 100644 --- a/tests/models/embedding/language/test_jina.py +++ b/tests/models/language/pooling/test_jina.py @@ -8,9 +8,10 @@ import pytest -from tests.models.embedding.utils import check_embeddings_close, matryoshka_fy from vllm import PoolingParams +from ...utils import check_embeddings_close, matryoshka_fy + SCORING_MODELS = [ "jinaai/jina-reranker-v2-base-multilingual", # Roberta ] diff --git a/tests/models/embedding/language/test_scoring.py b/tests/models/language/pooling/test_scoring.py similarity index 100% rename from tests/models/embedding/language/test_scoring.py rename to tests/models/language/pooling/test_scoring.py diff --git a/tests/models/embedding/language/test_snowflake_arctic_embed.py b/tests/models/language/pooling/test_snowflake_arctic_embed.py similarity index 97% rename from tests/models/embedding/language/test_snowflake_arctic_embed.py rename to tests/models/language/pooling/test_snowflake_arctic_embed.py index 2b884fceec8..81abc0e9e93 100644 --- a/tests/models/embedding/language/test_snowflake_arctic_embed.py +++ b/tests/models/language/pooling/test_snowflake_arctic_embed.py @@ -5,9 +5,7 @@ """ import pytest -from tests.models.embedding.utils import EmbedModelInfo - -from ..utils import check_embeddings_close +from ...utils import EmbedModelInfo, check_embeddings_close EMBEDDING_PROMPTS = [ 'what is snowflake?', 'Where can I get the best tacos?', 'The Data Cloud!', diff --git a/tests/models/embedding/language/test_truncation_control.py b/tests/models/language/pooling/test_truncation_control.py similarity index 100% rename from tests/models/embedding/language/test_truncation_control.py rename to tests/models/language/pooling/test_truncation_control.py diff --git a/tests/models/decoder_only/vision_language/__init__.py b/tests/models/multimodal/generation/__init__.py similarity index 100% rename from tests/models/decoder_only/vision_language/__init__.py rename to tests/models/multimodal/generation/__init__.py diff --git a/tests/models/decoder_only/vision_language/test_awq.py b/tests/models/multimodal/generation/test_awq.py similarity index 100% rename from tests/models/decoder_only/vision_language/test_awq.py rename to tests/models/multimodal/generation/test_awq.py diff --git a/tests/models/encoder_decoder/vision_language/test_broadcast.py b/tests/models/multimodal/generation/test_broadcast.py similarity index 100% rename from tests/models/encoder_decoder/vision_language/test_broadcast.py rename to tests/models/multimodal/generation/test_broadcast.py diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/multimodal/generation/test_common.py similarity index 100% rename from tests/models/decoder_only/vision_language/test_models.py rename to tests/models/multimodal/generation/test_common.py diff --git a/tests/models/encoder_decoder/vision_language/test_florence2.py b/tests/models/multimodal/generation/test_florence2.py similarity index 100% rename from tests/models/encoder_decoder/vision_language/test_florence2.py rename to tests/models/multimodal/generation/test_florence2.py diff --git a/tests/models/decoder_only/audio_language/test_granite_speech.py b/tests/models/multimodal/generation/test_granite_speech.py similarity index 100% rename from tests/models/decoder_only/audio_language/test_granite_speech.py rename to tests/models/multimodal/generation/test_granite_speech.py diff --git a/tests/models/decoder_only/vision_language/test_interleaved.py b/tests/models/multimodal/generation/test_interleaved.py similarity index 99% rename from tests/models/decoder_only/vision_language/test_interleaved.py rename to tests/models/multimodal/generation/test_interleaved.py index 8804497ae61..92c8155fe1e 100644 --- a/tests/models/decoder_only/vision_language/test_interleaved.py +++ b/tests/models/multimodal/generation/test_interleaved.py @@ -16,6 +16,7 @@ def base_prompt(modalities_str: str) -> str: NONINTERLEAVED_PROMPT = base_prompt("