DEFAULT_DEVICE = "cuda:0"
PATH_TO_MODEL_FOLDER = "/home/models"
PATH_TO_DATASET_FOLDER = "/home/datasets"
MAX_NEW_TOKENS = 512
TEMPERATURE = 0.5
DEFAULT_DEVICE_MAP = "auto"
MODEL_LIST = [
    "moonshotai/Kimi-VL-A3B-Instruct",
    "Qwen/Qwen2-VL-7B-Instruct",
    "Qwen/Qwen2-VL-72B-Instruct",
    "Qwen/Qwen2.5-VL-3B-Instruct",
    "Qwen/Qwen2.5-VL-7B-Instruct",
    "Qwen/Qwen2.5-VL-32B-Instruct",
    "Qwen/Qwen2.5-VL-72B-Instruct",
    "microsoft/Phi-4-multimodal-instruct",
    "Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5",
    "OpenGVLab/InternVL3-1B",
    "OpenGVLab/InternVL3-2B",
    "OpenGVLab/InternVL3-8B",
    "OpenGVLab/InternVL3-14B",
    "OpenGVLab/InternVL3-38B",
    "OpenGVLab/InternVL3-78B",
    "llava-hf/llava-interleave-qwen-7b-hf",
    "llava-hf/llava-onevision-qwen2-7b-ov-hf",
    "mistralai/Pixtral-12B-2409",
    "BiliSakura/RSCCM",
]
T5_MODEL_REPO = "sentence-transformers/sentence-t5-xxl"
BERT_MODEL_REPO = "FacebookAI/roberta-large"
BLEURT_MODEL_REPO = "lucadiliello/BLEURT-20"
# BLEURT_MODEL_REPO = "Elron/bleurt-large-512"
PART_MAPPING = {
    "part1": (0, 0, 512, 512),
    "part3": (0, 512, 512, 1024),
    "part2": (512, 0, 1024, 512),
    "part4": (512, 512, 1024, 1024),
}

# Prompt templates
PROMPT_TEMPLATES = {
    "naive": """
    Give change description between two satellite images.
    Output answer in a news style with a few sentences using precise phrases separated by commas.
    """,
    "textual": """
    These two satellite images show a {disaster_type} natural disaster. 

    Here is the disaster level descriptions: 
    - Disaster Level 0 (No Damage): Undisturbed. No sign of water, structural or shingle damage, or burn marks.
    - Disaster Level 1 (Minor Damage): Building partially burnt, water surrounding structure, volcanic flow nearby, roof elements missing, or visible cracks.
    - Disaster Level 2 (Major Damage): Partial wall or roof collapse, encroaching volcanic flow, or surrounded by water/mud. 
    - Disaster Level 3 (Destroyed): Scorched, completely collapsed, partially/completely covered with water/mud, or otherwise no longer present.  

    We already know that there are {number[all]} buildings. {number[no-damage]} buildings are no damaged. {number[minor-damage]} buildings are minor damaged, {number[major-damage]} building are major damaged, {number[destroyed]} buildings are destroyed. {number[un-classified]} buildings damage are unknown due to some reasons. 
    Now, describe the changes that occurred between the pre-event and post-event images with the given disaster level descriptions.
    Output answer in a news style with a few sentences using precise phrases separated by commas.
    """,
    "visual": """
    These two satellite images show a {disaster_type} natural disaster. 

    Here is the disaster level descriptions: 
    - Disaster Level 0 (No Damage): Undisturbed. No sign of water, structural or shingle damage, or burn marks.
    - Disaster Level 1 (Minor Damage): Building partially burnt, water surrounding structure, volcanic flow nearby, roof elements missing, or visible cracks.
    - Disaster Level 2 (Major Damage): Partial wall or roof collapse, encroaching volcanic flow, or surrounded by water/mud. 
    - Disaster Level 3 (Destroyed): Scorched, completely collapsed, partially/completely covered with water/mud, or otherwise no longer present.  

    We already know that there are {number[all]} buildings. {number[no-damage]} buildings are no damaged colored in green. {number[minor-damage]} buildings are minor damaged colored in blue, {number[major-damage]} building are major damaged colored in orange, {number[destroyed]} buildings are destroyed colored in red. {number[un-classified]} buildings damage are unknown due to some reasons colored in white. 
    Now, describe the changes that occurred between the pre-event and post-event images with the given disaster level descriptions.
    Output answer in a news style with a few sentences using precise phrases separated by commas.
    """,
}

EVALUATION_PROMPT_TEMPLATE = """You are a remote sensing expert. 

I will provide you with 2 satellite images of the same area before and after a natural disaster event. 
Your task is to evaluate change captions generated by different vision language models and select the best one.

**Evaluation Criteria:**
1. Accuracy - Correct interpretation of damage patterns and disaster type
2. Completeness - Inclusion of relevant details (structures affected, disaster indicators)
3. Clarity - Clear, concise description without contradictions
4. Adherence to Facts - Consistency with typical disaster damage levels

**Change Captions:**
{captions}

**Output Requirements:**
1. Choose the BEST model based on evaluation criteria
2. Provide a concise reason for your choice
3. Format response as JSON with keys 'best_model_id' and 'reason'

Example Response:
{{
    "best_model_id": "Model_3",
    "reason": "Most accurate damage assessment with specific structural details while maintaining clarity"
}}"""
# Hyperparameters for DeCo, DoLa, and VCD
DECO_ALPHA = 0.2
DECO_THRESHOLD_TOP_P = 0.9
DECO_THRESHOLD_TOP_K = 20
DECO_EARLY_EXIT_LAYERS_DEFAULT = [i for i in range(15, 25)]
# 选中（n/2）到后期层（3*n/4）
DECO_EARLY_EXIT_LAYERS_DICT = {
    "QWEN25VL3B": [i for i in range(18, 27)],  # LLM: Qwen2.5 3B layer_num=36
    "QWEN25VL7B": [i for i in range(14, 21)],  # LLM: Qwen2.5 7B layer_num=28
    "QWEN25VL32B": [i for i in range(32, 48)],  # LLM: Qwen2.5 32B layer_num=64
    "QWEN25VL72B": [i for i in range(40, 60)],  # LLM: Qwen2.5-VL 72B layer_num=80
    "INTERNVL3_1B": [i for i in range(12, 18)],  # LLM：Qwen2.5 0.5B layer_num=24
    "INTERNVL3_2B": [i for i in range(14, 21)],  # LLM：Qwen2.5 1.5B  layer_num=28
    "INTERNVL3_8B": [i for i in range(14, 21)],  # LLM：Qwen2.5 7B  layer_num=28
    "INTERNVL3_14B": [i for i in range(24, 36)],  # LLM: Qwen2.5 14B layer_num=48
    "INTERNVL3_38B": [i for i in range(32, 48)],  # LLM：Qwen2.5 32B  layer_num=64
    "INTERNVL3_78B": [i for i in range(40, 60)],  # LLM: Qwen2.5 78B layer_num=80
}
DOLA_LAYER = "high"  # "high" "all"
VCD_ALPHA = 0.5
VCD_BETA = 0.1
VCD_NOISE_STEP = 500
