Skip to content

Commit 26cf9d6

Browse files
DarkLight1337wuisawesome
authored andcommitted
[Bugfix] Fix standard models tests (vllm-project#17217)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent 328cad9 commit 26cf9d6

File tree

4 files changed

+69
-60
lines changed

4 files changed

+69
-60
lines changed

docs/source/models/supported_models.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ See [this page](#generative-models) for more information on how to use generativ
322322
* ✅︎
323323
- * `GemmaForCausalLM`
324324
* Gemma
325-
* `google/gemma-2b`, `google/gemma-7b`, etc.
325+
* `google/gemma-2b`, `google/gemma-1.1-2b-it`, etc.
326326
* ✅︎
327327
* ✅︎
328328
- * `Gemma2ForCausalLM`

tests/distributed/test_pipeline_parallel.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,12 @@ def iter_params(self, model_id: str):
161161
"deepseek-ai/DeepSeek-V2-Lite-Chat": PPTestSettings.fast(),
162162
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct": PPTestSettings.fast(),
163163
"tiiuae/falcon-7b": PPTestSettings.fast(),
164-
"google/gemma-2b": PPTestSettings.fast(),
164+
"google/gemma-1.1-2b-it": PPTestSettings.fast(),
165165
"google/gemma-2-9b": PPTestSettings.fast(),
166166
"gpt2": PPTestSettings.fast(),
167167
"bigcode/starcoder": PPTestSettings.fast(),
168168
"EleutherAI/gpt-j-6b": PPTestSettings.fast(),
169-
"EleutherAI/pythia-12b": PPTestSettings.fast(),
169+
"EleutherAI/pythia-1.4b": PPTestSettings.fast(),
170170
"ibm/PowerLM-3b": PPTestSettings.fast(),
171171
"ibm/PowerMoE-3b": PPTestSettings.fast(),
172172
# Uses Llama
@@ -195,7 +195,7 @@ def iter_params(self, model_id: str):
195195
"microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
196196
"microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(multi_node_only=True, load_format="dummy"), # noqa: E501
197197
"Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
198-
"Qwen/Qwen2-7B-Instruct": PPTestSettings.fast(),
198+
"Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
199199
"Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
200200
"stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(),
201201
"bigcode/starcoder2-3b": PPTestSettings.fast(),

tests/models/decoder_only/language/test_models.py

+41-31
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from vllm.platforms import current_platform
1111

12+
from ....utils import large_gpu_mark
1213
from ...registry import HF_EXAMPLE_MODELS
1314
from ...utils import check_logprobs_close
1415

@@ -26,71 +27,78 @@
2627
AITER_MODEL_LIST = [
2728
"meta-llama/Llama-3.2-1B-Instruct",
2829
"openbmb/MiniCPM3-4B",
29-
"Qwen/Qwen-7B",
30+
"Qwen/Qwen-7B-Chat",
3031
"Qwen/Qwen2.5-0.5B-Instruct",
3132
"ehristoforu/Falcon3-MoE-2x7B-Insruct",
3233
]
3334

3435

3536
# @maybe_test_rocm_aiter
3637
@pytest.mark.parametrize(
37-
"model_arch",
38+
"model",
3839
[
3940
pytest.param(
40-
"BloomForCausalLM", # testing alibi slopes
41+
"bigscience/bloom-560m", # bloom - testing alibi slopes
4142
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
4243
),
4344
pytest.param(
44-
"GPT2LMHeadModel", # gpt2
45+
"openai-community/gpt2", # gpt2
4546
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
4647
),
47-
pytest.param("GPTJForCausalLM"),
48-
pytest.param("GPTBigCodeForCausalLM"),
49-
pytest.param("GPTNeoXForCausalLM"),
48+
pytest.param("Milos/slovak-gpt-j-405M"), # gptj
49+
pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode
50+
pytest.param("EleutherAI/pythia-70m"), # gpt_neox
5051
pytest.param(
51-
"GemmaForCausalLM", # gemma
52+
"google/gemma-1.1-2b-it", # gemma
5253
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
5354
),
54-
pytest.param("GlmForCausalLM"),
5555
pytest.param(
56-
"LlamaForCausalLM",
56+
"THUDM/chatglm3-6b", # chatglm (text-only)
57+
),
58+
pytest.param(
59+
"meta-llama/Llama-3.2-1B-Instruct", # llama
5760
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
5861
),
5962
pytest.param(
60-
"MiniCPM3ForCausalLM",
63+
"openbmb/MiniCPM3-4B",
6164
# fused_moe not supported on CPU
62-
marks=[pytest.mark.core_model],
65+
marks=[pytest.mark.core_model,
66+
large_gpu_mark(min_gb=32)],
6367
),
6468
pytest.param(
65-
"OPTForCausalLM",
69+
"facebook/opt-125m", # opt
6670
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
6771
),
6872
pytest.param(
69-
"PhiForCausalLM",
73+
"microsoft/phi-2", # phi
7074
marks=[pytest.mark.core_model],
7175
),
72-
pytest.param("QWenLMHeadModel", ),
7376
pytest.param(
74-
"Qwen2ForCausalLM",
77+
"Qwen/Qwen-7B-Chat", # qwen (text-only)
78+
),
79+
pytest.param(
80+
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
7581
marks=[pytest.mark.core_model],
7682
),
77-
pytest.param("StableLmForCausalLM"),
78-
pytest.param("Starcoder2ForCausalLM"),
83+
pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm
84+
pytest.param("bigcode/starcoder2-3b"), # starcoder2
7985
pytest.param(
80-
"MixtralForCausalLM",
81-
marks=[pytest.mark.cpu_model],
86+
"ehristoforu/Falcon3-MoE-2x7B-Insruct", # mixtral
87+
marks=[pytest.mark.cpu_model,
88+
large_gpu_mark(min_gb=48)],
8289
)
8390
])
84-
@pytest.mark.parametrize("dtype", ["half"])
8591
@pytest.mark.parametrize("max_tokens", [32])
8692
@pytest.mark.parametrize("num_logprobs", [5])
8793
@pytest.mark.parametrize(
8894
"use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False])
89-
def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str,
90-
dtype: str, max_tokens: int, num_logprobs: int,
91-
use_rocm_aiter: bool, monkeypatch) -> None:
95+
def test_models(hf_runner, vllm_runner, example_prompts, model: str,
96+
max_tokens: int, num_logprobs: int, use_rocm_aiter: bool,
97+
monkeypatch) -> None:
9298

93-
model = HF_EXAMPLE_MODELS.get_hf_info(model_arch).default
99+
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
100+
model_info.check_available_online(on_fail="skip")
101+
model_info.check_transformers_version(on_fail="skip")
94102

95103
if model in REQUIRES_V0:
96104
monkeypatch.setenv("VLLM_USE_V1", "0")
@@ -104,15 +112,17 @@ def test_models(hf_runner, vllm_runner, example_prompts, model_arch: str,
104112
# in parts of the operators
105113
pytest.skip(f"Skipping '{model}' model test with AITER kernel.")
106114

107-
with hf_runner(model, dtype=dtype) as hf_model:
108-
if model.startswith("THUDM/chatglm3"):
109-
hf_model.model.get_output_embeddings = lambda: \
110-
hf_model.model.transformer.output_layer
111-
115+
with hf_runner(model) as hf_model:
112116
hf_outputs = hf_model.generate_greedy_logprobs_limit(
113117
example_prompts, max_tokens, num_logprobs)
114118

115-
with vllm_runner(model, dtype=dtype) as vllm_model:
119+
with vllm_runner(
120+
model,
121+
tokenizer_name=model_info.tokenizer or model,
122+
tokenizer_mode=model_info.tokenizer_mode,
123+
trust_remote_code=model_info.trust_remote_code,
124+
max_num_seqs=2,
125+
) as vllm_model:
116126
vllm_outputs = vllm_model.generate_greedy_logprobs(
117127
example_prompts, max_tokens, num_logprobs)
118128

tests/models/registry.py

+24-25
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,11 @@ def check_available_online(
121121
"BaichuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan2-7B-chat",
122122
trust_remote_code=True),
123123
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B"),
124-
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloomz-1b1"),
124+
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
125+
{"1b": "bigscience/bloomz-1b1"}),
125126
"ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b",
126127
trust_remote_code=True,
127-
max_transformers_version="4.51.1"),
128+
max_transformers_version="4.48"),
128129
"ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3", # noqa: E501
129130
trust_remote_code=True),
130131
"CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01",
@@ -142,24 +143,26 @@ def check_available_online(
142143
"ExaoneForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), # noqa: E501
143144
"Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"), # noqa: E501
144145
"FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),
145-
"GemmaForCausalLM": _HfExamplesInfo("google/gemma-2b"),
146+
"GemmaForCausalLM": _HfExamplesInfo("google/gemma-1.1-2b-it"),
146147
"Gemma2ForCausalLM": _HfExamplesInfo("google/gemma-2-9b"),
147-
"Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it",
148-
min_transformers_version="4.50"),
148+
"Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
149149
"GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"),
150150
"Glm4ForCausalLM": _HfExamplesInfo(
151151
"THUDM/GLM-4-32B-0414",
152152
is_available_online=False,
153153
min_transformers_version="4.52.dev0"
154154
),
155-
"GPT2LMHeadModel": _HfExamplesInfo("gpt2"),
156-
"GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder"),
157-
"GPTJForCausalLM": _HfExamplesInfo("EleutherAI/gpt-j-6b"),
158-
"GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-160m"),
155+
"GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2",
156+
{"alias": "gpt2"}),
157+
"GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder",
158+
{"tiny": "bigcode/tiny_starcoder_py"}), # noqa: E501
159+
"GPTJForCausalLM": _HfExamplesInfo("Milos/slovak-gpt-j-405M",
160+
{"6b": "EleutherAI/gpt-j-6b"}),
161+
"GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
162+
{"1b": "EleutherAI/pythia-1.4b"}),
159163
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
160164
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
161-
"GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts", # noqa: E501
162-
min_transformers_version="4.49"), # noqa: E501
165+
"GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts"), # noqa: E501
163166
"Grok1ModelForCausalLM": _HfExamplesInfo("hpcai-tech/grok-1",
164167
trust_remote_code=True),
165168
"InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b",
@@ -187,15 +190,17 @@ def check_available_online(
187190
"MiniMaxText01ForCausalLM": _HfExamplesInfo("MiniMaxAI/MiniMax-Text-01",
188191
trust_remote_code=True),
189192
"MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
190-
"MixtralForCausalLM": _HfExamplesInfo("mistralai/Mixtral-8x7B-Instruct-v0.1"), # noqa: E501
193+
"MixtralForCausalLM": _HfExamplesInfo("mistralai/Mixtral-8x7B-Instruct-v0.1", # noqa: E501
194+
{"falcon3": "ehristoforu/Falcon3-MoE-2x7B-Insruct"}), # noqa: E501
191195
"QuantMixtralForCausalLM": _HfExamplesInfo("mistral-community/Mixtral-8x22B-v0.1-AWQ"), # noqa: E501
192196
"MptForCausalLM": _HfExamplesInfo("mpt", is_available_online=False),
193197
"MPTForCausalLM": _HfExamplesInfo("mosaicml/mpt-7b"),
194198
"NemotronForCausalLM": _HfExamplesInfo("nvidia/Minitron-8B-Base"),
195199
"OlmoForCausalLM": _HfExamplesInfo("allenai/OLMo-1B-hf"),
196200
"Olmo2ForCausalLM": _HfExamplesInfo("shanearora/OLMo-7B-1124-hf"),
197201
"OlmoeForCausalLM": _HfExamplesInfo("allenai/OLMoE-1B-7B-0924-Instruct"),
198-
"OPTForCausalLM": _HfExamplesInfo("facebook/opt-iml-max-1.3b"),
202+
"OPTForCausalLM": _HfExamplesInfo("facebook/opt-125m",
203+
{"1b": "facebook/opt-iml-max-1.3b"}),
199204
"OrionForCausalLM": _HfExamplesInfo("OrionStarAI/Orion-14B-Chat",
200205
trust_remote_code=True),
201206
"PersimmonForCausalLM": _HfExamplesInfo("adept/persimmon-8b-chat"),
@@ -209,8 +214,8 @@ def check_available_online(
209214
trust_remote_code=True),
210215
"QWenLMHeadModel": _HfExamplesInfo("Qwen/Qwen-7B-Chat",
211216
trust_remote_code=True),
212-
"Qwen2ForCausalLM": _HfExamplesInfo("Qwen/Qwen2-7B-Instruct",
213-
extras={"2.5": "Qwen/Qwen2.5-7B-Instruct"}), # noqa: E501
217+
"Qwen2ForCausalLM": _HfExamplesInfo("Qwen/Qwen2-0.5B-Instruct",
218+
extras={"2.5": "Qwen/Qwen2.5-0.5B-Instruct"}), # noqa: E501
214219
"Qwen2MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen1.5-MoE-A2.7B-Chat"),
215220
"Qwen3ForCausalLM": _HfExamplesInfo(
216221
"Qwen/Qwen3-8B",
@@ -236,8 +241,7 @@ def check_available_online(
236241
"XverseForCausalLM": _HfExamplesInfo("xverse/XVERSE-7B-Chat",
237242
is_available_online=False,
238243
trust_remote_code=True),
239-
"Zamba2ForCausalLM": _HfExamplesInfo("Zyphra/Zamba2-7B-instruct",
240-
min_transformers_version="4.49"),
244+
"Zamba2ForCausalLM": _HfExamplesInfo("Zyphra/Zamba2-7B-instruct"),
241245
# [Encoder-decoder]
242246
"BartModel": _HfExamplesInfo("facebook/bart-base"),
243247
"BartForConditionalGeneration": _HfExamplesInfo("facebook/bart-large-cnn"),
@@ -280,9 +284,7 @@ def check_available_online(
280284
"BertForSequenceClassification": _HfExamplesInfo("cross-encoder/ms-marco-MiniLM-L-6-v2"), # noqa: E501
281285
"RobertaForSequenceClassification": _HfExamplesInfo("cross-encoder/quora-roberta-base"), # noqa: E501
282286
"XLMRobertaForSequenceClassification": _HfExamplesInfo("BAAI/bge-reranker-v2-m3"), # noqa: E501
283-
"ModernBertForSequenceClassification":
284-
_HfExamplesInfo("Alibaba-NLP/gte-reranker-modernbert-base",
285-
min_transformers_version="4.49"),
287+
"ModernBertForSequenceClassification": _HfExamplesInfo("Alibaba-NLP/gte-reranker-modernbert-base"), # noqa: E501
286288
}
287289

288290
_MULTIMODAL_EXAMPLE_MODELS = {
@@ -298,8 +300,7 @@ def check_available_online(
298300
transformers_version_reason="HF model is not compatible.", # noqa: E501
299301
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501
300302
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
301-
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it",
302-
min_transformers_version="4.50"),
303+
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
303304
"GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
304305
trust_remote_code=True,
305306
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
@@ -335,7 +336,6 @@ def check_available_online(
335336
extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501
336337
trust_remote_code=True),
337338
"Mistral3ForConditionalGeneration": _HfExamplesInfo("mistralai/Mistral-Small-3.1-24B-Instruct-2503", # noqa: E501
338-
min_transformers_version="4.50", # noqa: E501
339339
extras={"fp8": "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"}), # noqa: E501
340340
"MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924",
341341
max_transformers_version="4.48",
@@ -361,8 +361,7 @@ def check_available_online(
361361
hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]}), # noqa: E501
362362
"Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501
363363
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501
364-
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct", # noqa: E501
365-
min_transformers_version="4.49"), # noqa: E501
364+
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"), # noqa: E501
366365
"Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B", # noqa: E501
367366
min_transformers_version="4.52"), # noqa: E501
368367
"SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),

0 commit comments

Comments
 (0)