@@ -263,6 +263,10 @@ class ModelConfig:
263
263
the model name will be the same as `model`.
264
264
limit_mm_per_prompt: Maximum number of data items per modality
265
265
per prompt. Only applicable for multimodal models.
266
+ mm_processor_kwargs: Overrides for the multi-modal processor obtained
267
+ from `AutoProcessor.from_pretrained`.
268
+ disable_mm_preprocessor_cache: If True, disable caching of the
269
+ processed multi-modal inputs.
266
270
use_async_output_proc: Whether to use async output processor.
267
271
Defaults to True.
268
272
config_format: The config format which shall be loaded.
@@ -273,10 +277,6 @@ class ModelConfig:
273
277
hf_overrides: If a dictionary, contains arguments to be forwarded to the
274
278
HuggingFace config. If a callable, it is called to update the
275
279
HuggingFace config.
276
- mm_processor_kwargs: Arguments to be forwarded to the model's processor
277
- for multi-modal data, e.g., image processor.
278
- disable_mm_preprocessor_cache: If true, then disables caching of the
279
- multi-modal preprocessor/mapper. (not recommended)
280
280
override_neuron_config: Initialize non default neuron config or
281
281
override default neuron config that are specific to Neuron devices,
282
282
this argument will be used to configure the neuron config that
@@ -320,7 +320,6 @@ def compute_hash(self) -> str:
320
320
factors .append (self .max_logprobs )
321
321
factors .append (self .disable_sliding_window )
322
322
factors .append (self .trust_remote_code )
323
- factors .append (self .mm_processor_kwargs )
324
323
factors .append (self .generation_config )
325
324
factors .append (self .model_impl )
326
325
factors .append (self .override_generation_config )
@@ -359,12 +358,12 @@ def __init__(
359
358
skip_tokenizer_init : bool = False ,
360
359
served_model_name : Optional [Union [str , list [str ]]] = None ,
361
360
limit_mm_per_prompt : Optional [dict [str , int ]] = None ,
361
+ mm_processor_kwargs : Optional [dict [str , Any ]] = None ,
362
+ disable_mm_preprocessor_cache : bool = False ,
362
363
use_async_output_proc : bool = True ,
363
364
config_format : ConfigFormat = ConfigFormat .AUTO ,
364
365
hf_token : Optional [Union [bool , str ]] = None ,
365
366
hf_overrides : Optional [HfOverrides ] = None ,
366
- mm_processor_kwargs : Optional [dict [str , Any ]] = None ,
367
- disable_mm_preprocessor_cache : bool = False ,
368
367
override_neuron_config : Optional [dict [str , Any ]] = None ,
369
368
override_pooler_config : Optional ["PoolerConfig" ] = None ,
370
369
logits_processor_pattern : Optional [str ] = None ,
@@ -469,8 +468,6 @@ def __init__(
469
468
self .model , hf_token = hf_token , revision = revision )
470
469
self .dtype = _get_and_verify_dtype (self .hf_config , dtype )
471
470
self .use_async_output_proc = use_async_output_proc
472
- self .mm_processor_kwargs = mm_processor_kwargs
473
- self .disable_mm_preprocessor_cache = disable_mm_preprocessor_cache
474
471
475
472
# Set enforce_eager to False if the value is unset.
476
473
if self .enforce_eager is None :
@@ -515,7 +512,10 @@ def __init__(
515
512
self .served_model_name = get_served_model_name (model ,
516
513
served_model_name )
517
514
self .multimodal_config = self ._init_multimodal_config (
518
- limit_mm_per_prompt )
515
+ limit_mm_per_prompt = limit_mm_per_prompt ,
516
+ mm_processor_kwargs = mm_processor_kwargs ,
517
+ disable_mm_preprocessor_cache = disable_mm_preprocessor_cache ,
518
+ )
519
519
if not self .skip_tokenizer_init :
520
520
self ._verify_tokenizer_mode ()
521
521
@@ -581,14 +581,27 @@ def maybe_pull_model_tokenizer_for_s3(self, model: str,
581
581
self .tokenizer = s3_tokenizer .dir
582
582
583
583
def _init_multimodal_config (
584
- self , limit_mm_per_prompt : Optional [dict [str , int ]]
584
+ self ,
585
+ limit_mm_per_prompt : Optional [dict [str , int ]],
586
+ mm_processor_kwargs : Optional [dict [str , Any ]],
587
+ disable_mm_preprocessor_cache : bool ,
585
588
) -> Optional ["MultiModalConfig" ]:
586
589
if self .registry .is_multimodal_model (self .architectures ):
587
- return MultiModalConfig (limit_per_prompt = limit_mm_per_prompt or {})
590
+ return MultiModalConfig (
591
+ limit_per_prompt = limit_mm_per_prompt or {},
592
+ mm_processor_kwargs = mm_processor_kwargs or {},
593
+ disable_mm_preprocessor_cache = disable_mm_preprocessor_cache ,
594
+ )
588
595
589
596
if limit_mm_per_prompt :
590
597
raise ValueError ("`limit_mm_per_prompt` is only supported for "
591
598
"multimodal models." )
599
+ if mm_processor_kwargs :
600
+ raise ValueError ("`mm_processor_kwargs` is only supported for "
601
+ "multimodal models." )
602
+ if disable_mm_preprocessor_cache :
603
+ raise ValueError ("`disable_mm_preprocessor_cache` is only "
604
+ "supported for multimodal models." )
592
605
593
606
return None
594
607
@@ -2776,7 +2789,23 @@ class MultiModalConfig:
2776
2789
Defaults to 1 (V0) or 999 (V1) for each modality.
2777
2790
2778
2791
For example, to allow up to 16 images and 2 videos per prompt:
2779
- ``{"images": 16, "videos": 2}``
2792
+ :code:`{"images": 16, "videos": 2}`
2793
+ """
2794
+
2795
+ mm_processor_kwargs : Optional [dict [str , object ]] = None
2796
+ """
2797
+ Overrides for the multi-modal processor obtained from
2798
+ :meth:`transformers.AutoProcessor.from_pretrained`.
2799
+
2800
+ The available overrides depend on the model that is being run.
2801
+
2802
+ For example, for Phi-3-Vision:
2803
+ :code:`{"num_crops": 4}`.
2804
+ """
2805
+
2806
+ disable_mm_preprocessor_cache : bool = False
2807
+ """
2808
+ If :code:`True`, disable caching of the processed multi-modal inputs.
2780
2809
"""
2781
2810
2782
2811
def compute_hash (self ) -> str :
@@ -4080,8 +4109,6 @@ def __str__(self):
4080
4109
f"enable_prefix_caching={ self .cache_config .enable_prefix_caching } , "
4081
4110
f"chunked_prefill_enabled={ self .scheduler_config .chunked_prefill_enabled } , " # noqa
4082
4111
f"use_async_output_proc={ self .model_config .use_async_output_proc } , "
4083
- f"disable_mm_preprocessor_cache={ self .model_config .disable_mm_preprocessor_cache !r} , " # noqa
4084
- f"mm_processor_kwargs={ self .model_config .mm_processor_kwargs } , "
4085
4112
f"pooler_config={ self .model_config .pooler_config !r} , "
4086
4113
f"compilation_config={ self .compilation_config !r} " )
4087
4114
0 commit comments