from vllm.config import (CacheConfig, DeviceConfig, LoRAConfig, ModelConfig,
                         ParallelConfig, SchedulerConfig, TokenizerPoolConfig,
                         VisionLanguageConfig)
from typing import TYPE_CHECKING, ClassVar, Optional, Union, Tuple
from vllm.utils import str_to_int_tuple

def __init__(
    self,
    max_num_batched_tokens: Optional[int],
    max_num_seqs: int,
    max_model_len: int,
    use_v2_block_manager: bool = False,
    num_lookahead_slots: int = 0,
    delay_factor: float = 0.0,
    enable_chunked_prefill: bool = False,
    model_name: str = None,
) -> None:
    if max_num_batched_tokens is not None:
        self.max_num_batched_tokens = max_num_batched_tokens
    else:
        # If max_model_len is too short, use 2048 as the default value for
        # higher throughput.
        self.max_num_batched_tokens = max(max_model_len, 2048)
    self.max_num_seqs = max_num_seqs
    self.max_model_len = max_model_len
    self.use_v2_block_manager = use_v2_block_manager
    self.num_lookahead_slots = num_lookahead_slots
    self.delay_factor = delay_factor
    self.chunked_prefill_enabled = enable_chunked_prefill
    self.model_name = model_name
    self._verify_args()
setattr(SchedulerConfig, "__init__", __init__)

def create_engine_configs(
        self,
    ) -> Tuple[ModelConfig, CacheConfig, ParallelConfig, SchedulerConfig,
               DeviceConfig, Optional[LoRAConfig],
               Optional[VisionLanguageConfig]]:
        device_config = DeviceConfig(self.device)
        model_config = ModelConfig(
            self.model, self.tokenizer, self.tokenizer_mode,
            self.trust_remote_code, self.download_dir, self.load_format,
            self.dtype, self.seed, self.revision, self.code_revision,
            self.tokenizer_revision, self.max_model_len, self.quantization,
            self.enforce_eager, self.max_context_len_to_capture,
            self.max_logprobs)
        cache_config = CacheConfig(self.block_size,
                                   self.gpu_memory_utilization,
                                   self.swap_space, self.kv_cache_dtype,
                                   self.forced_num_gpu_blocks,
                                   model_config.get_sliding_window(),
                                   self.enable_prefix_caching)
        parallel_config = ParallelConfig(
            self.pipeline_parallel_size, self.tensor_parallel_size,
            self.worker_use_ray, self.max_parallel_loading_workers,
            self.disable_custom_all_reduce,
            TokenizerPoolConfig.create_config(
                self.tokenizer_pool_size,
                self.tokenizer_pool_type,
                self.tokenizer_pool_extra_config,
            ), self.ray_workers_use_nsight)
        scheduler_config = SchedulerConfig(
            self.max_num_batched_tokens,
            self.max_num_seqs,
            model_config.max_model_len,
            self.use_v2_block_manager,
            num_lookahead_slots=self.num_lookahead_slots,
            delay_factor=self.scheduler_delay_factor,
            enable_chunked_prefill=self.enable_chunked_prefill,
            model_name=self.model,
        )
        lora_config = LoRAConfig(
            max_lora_rank=self.max_lora_rank,
            max_loras=self.max_loras,
            lora_extra_vocab_size=self.lora_extra_vocab_size,
            lora_dtype=self.lora_dtype,
            max_cpu_loras=self.max_cpu_loras if self.max_cpu_loras
            and self.max_cpu_loras > 0 else None) if self.enable_lora else None

        if self.image_input_type:
            if (not self.image_token_id or not self.image_input_shape
                    or not self.image_feature_size):
                raise ValueError(
                    'Specify `image_token_id`, `image_input_shape` and '
                    '`image_feature_size` together with `image_input_type`.')
            vision_language_config = VisionLanguageConfig(
                image_input_type=VisionLanguageConfig.
                get_image_input_enum_type(self.image_input_type),
                image_token_id=self.image_token_id,
                image_input_shape=str_to_int_tuple(self.image_input_shape),
                image_feature_size=self.image_feature_size,
            )
        else:
            vision_language_config = None

        return (model_config, cache_config, parallel_config, scheduler_config,
                device_config, lora_config, vision_language_config)
from vllm.engine.arg_utils import EngineArgs
setattr(EngineArgs, "create_engine_configs", create_engine_configs)