|
8 | 8 | # Documentation
|
9 | 9 | # label(str): the name of the test. emoji allowed.
|
10 | 10 | # fast_check(bool): whether to run this on each commit on fastcheck pipeline.
|
| 11 | +# torch_nightly(bool): whether to run this on vllm against torch nightly pipeline. |
11 | 12 | # fast_check_only(bool): run this test on fastcheck pipeline only
|
12 | 13 | # optional(bool): never run this test by default (i.e. need to unblock manually) unless it's scheduled nightly run.
|
13 | 14 | # command(str): the single command to run for tests. incompatible with commands.
|
|
70 | 71 | - label: Basic Correctness Test # 30min
|
71 | 72 | #mirror_hardwares: [amd]
|
72 | 73 | fast_check: true
|
| 74 | + torch_nightly: true |
73 | 75 | source_file_dependencies:
|
74 | 76 | - vllm/
|
75 | 77 | - tests/basic_correctness/test_basic_correctness
|
@@ -104,6 +106,7 @@ steps:
|
104 | 106 | - label: Entrypoints Test # 40min
|
105 | 107 | working_dir: "/vllm-workspace/tests"
|
106 | 108 | fast_check: true
|
| 109 | + torch_nightly: true |
107 | 110 | #mirror_hardwares: [amd]
|
108 | 111 | source_file_dependencies:
|
109 | 112 | - vllm/
|
@@ -205,6 +208,8 @@ steps:
|
205 | 208 | - pytest -v -s v1/sample
|
206 | 209 | - pytest -v -s v1/worker
|
207 | 210 | - pytest -v -s v1/structured_output
|
| 211 | + - pytest -v -s v1/spec_decode |
| 212 | + - pytest -v -s v1/test_serial_utils.py |
208 | 213 | - pytest -v -s v1/test_stats.py
|
209 | 214 | - pytest -v -s v1/test_utils.py
|
210 | 215 | - pytest -v -s v1/test_oracle.py
|
@@ -312,15 +317,46 @@ steps:
|
312 | 317 | commands:
|
313 | 318 | - pytest -v -s compile/test_full_graph.py
|
314 | 319 |
|
315 |
| -- label: Kernels Test %N # 1h each |
316 |
| - # mirror_hardwares: [amd] |
| 320 | +- label: Kernels Core Operation Test |
317 | 321 | source_file_dependencies:
|
318 | 322 | - csrc/
|
| 323 | + - tests/kernels/core |
| 324 | + commands: |
| 325 | + - pytest -v -s kernels/core |
| 326 | + |
| 327 | +- label: Kernels Attention Test %N |
| 328 | + source_file_dependencies: |
| 329 | + - csrc/attention/ |
319 | 330 | - vllm/attention
|
320 |
| - - tests/kernels |
| 331 | + - vllm/v1/attention |
| 332 | + - tests/kernels/attention |
321 | 333 | commands:
|
322 |
| - - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT |
323 |
| - parallelism: 4 |
| 334 | + - pytest -v -s kernels/attention --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT |
| 335 | + parallelism: 2 |
| 336 | + |
| 337 | +- label: Kernels Quantization Test %N |
| 338 | + source_file_dependencies: |
| 339 | + - csrc/quantization/ |
| 340 | + - vllm/model_executor/layers/quantization |
| 341 | + - tests/kernels/quantization |
| 342 | + commands: |
| 343 | + - pytest -v -s kernels/quantization --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT |
| 344 | + parallelism: 2 |
| 345 | + |
| 346 | +- label: Kernels MoE Test |
| 347 | + source_file_dependencies: |
| 348 | + - csrc/moe/ |
| 349 | + - tests/kernels/moe |
| 350 | + - vllm/model_executor/layers/fused_moe/ |
| 351 | + commands: |
| 352 | + - pytest -v -s kernels/moe |
| 353 | + |
| 354 | +- label: Kernels Mamba Test |
| 355 | + source_file_dependencies: |
| 356 | + - csrc/mamba/ |
| 357 | + - tests/kernels/mamba |
| 358 | + commands: |
| 359 | + - pytest -v -s kernels/mamba |
324 | 360 |
|
325 | 361 | - label: Tensorizer Test # 11min
|
326 | 362 | # mirror_hardwares: [amd]
|
|
0 commit comments