from sir_llm.kv_cache import StartRecentKVCache


def enable_streaming_llm(model, start_size, recent_size,token_entropy_size):
    if "llama" in model.config.model_type:
        k_seq_dim = v_seq_dim = 2
        from sir_llm.pos_shift.modify_llama import (
            enable_llama_pos_shift_attention,
        )

        enable_llama_pos_shift_attention(model)
    elif "phi3" in model.config.model_type:
        k_seq_dim = v_seq_dim = 2
        from sir_llm.pos_shift.modify_phi3 import (
            enable_phi3_pos_shift_attention,
        )

        enable_phi3_pos_shift_attention(model)
    elif "mpt" in model.config.model_type:
        v_seq_dim = 2
        k_seq_dim = 3
    elif "gpt_neox" in model.config.model_type:
        k_seq_dim = v_seq_dim = 2
        from sir_llm.pos_shift.modify_gpt_neox import (
            enable_gpt_neox_pos_shift_attention,
        )

        enable_gpt_neox_pos_shift_attention(model)
    elif "falcon" in model.config.model_type:
        v_seq_dim = 1
        k_seq_dim = 1
        from sir_llm.pos_shift.modify_falcon import (
            enable_falcon_pos_shift_attention,
        )

        enable_falcon_pos_shift_attention(model)
    else:
        raise ValueError(f"got {model.config.model_type}")
    

    kv_cache = StartRecentKVCache(
        start_size=start_size,
        recent_size=recent_size,
        token_entropy_size=token_entropy_size,
        k_seq_dim=k_seq_dim,
        v_seq_dim=v_seq_dim,
    )
    return kv_cache
