def get_description_prompt(args, data):
    special_tokens = get_special_tokens(args)

    prompt = special_tokens["botext"]
    prompt += (
        f'{special_tokens["bosys"]}'
        'You are a diagram description assistant. The term "diagram" here refers to any complicated image, '
        'including charts, infographics, illustrations, academic diagrams, and other visually complex representations. \n'
        'Your task is to provide a detailed and structured description of the given diagram. '
        'Focus on aspects that might help to tag its domain (e.g., Biology, Chemistry, History) and type (e.g., Bar Chart, Flow Chart, Map). '
        'Avoid subjective interpretations or conclusions unrelated to the tagging task. \n'
        f'{special_tokens["eot"]}\n'
    )
    prompt += (
        f'{special_tokens["bouser"]}'
        f'{special_tokens["img"]}'
    )
    if args.use_wiki_text == "wiki":
        prompt += (
            f'{wiki_text_info(data)}'
        )
    prompt += (
        'Your description should be organized into the following sections: \n'
        '1. Content: Describe the key visual elements, labels, and any prominent features of the diagram. \n'
        '2. Layout: Explain how the elements are arranged (e.g., hierarchical, circular, linear) and the overall structure of the diagram. \n'
        '3. Function: Indicate the likely purpose of the diagram (e.g., explaining a process, showing relationships, presenting data). \n'
        'Output your description as structured paragraphs under these three headings. '
        'If any aspect cannot be reasonably described, use "NA" for that section. \n'
        f'{special_tokens["eot"]}\n'
    )
    prompt += special_tokens["boasst"]

    return prompt


def get_tagging_prompt(args, data):
    special_tokens = get_special_tokens(args)

    prompt = special_tokens["botext"]
    prompt += (
        f'{special_tokens["bosys"]}'
        'You are a diagram tagging assistant. Your task is to analyze a diagram and identify its domain and type. \n'
        f'{special_tokens["eot"]}\n'
    )

    if args.use_option == "no-option":
        prompt += (
            f'{special_tokens["bouser"]}'
            f'{special_tokens["img"]}'
        )
        if args.use_desc == "desc":
            prompt += (
                'The description of the diagram is provided for your reference: \n'
                f'{data["model_description"]}\n\n'
            )
        prompt += (
            'Instruction:\n'
            'Now analyze the diagram and provide its domain and type in a structured JSON format. \n'
            'The domain should be a specific field or area of knowledge. '
            'Its examples include Biology, Chemistry, Physics, Astronomy, History, etc. \n'
            'The type should describe the nature of the diagram. '
            'Its examples include Bar Chart, Flow Chart, Table, Map, Logo, etc. \n'
            'Your output must be in the following JSON-like format: \n'
            '{'
            '  "Domain": "string (must be 1 or 2 words)",'
            '  "Type": "string (must be 1 or 2 words)"'
            '}\n'
            'Do not provide any explanations or additional context. Only output the JSON object. \n'
            f'{special_tokens["eot"]}\n'
        )
    else:
        domain_options = [
            "Agriculture",
            "Astronomy",
            "Biology",
            "Chemistry",
            "Computer Science",
            "Data Science",
            "Environmental Science",
            "Finance",
            "Geography and Geology",
            "Health Science",
            "History",
            "Mathematics",
            "Music",
            "Network Science",
            "Operations Research",
            "Physics",
            "Political Science",
            "Psychology",
            "Sports",
            "Transportation",
            "Urban Planning"
        ]

        type_options = [
            "Bar Chart",
            "Chemical Visual",
            "Concept Diagram",
            "Floor Plan",
            "Flow Chart",
            "Line Graph",
            "Logo",
            "Map",
            "Network Chart",
            "Pie Chart",
            "Scatter Plot",
            "Table",
            "Technical Diagram",
            "Timeline",
            "Tree"
        ]

        domain_formatted_options = '\n'.join([f'- {option}' for option in domain_options])
        type_formatted_options = '\n'.join([f'- {option}' for option in type_options])

        prompt += (
            f'{special_tokens["bouser"]}'
            f'{special_tokens["img"]}'
        )
        if args.use_desc == "desc":
            prompt += (
                'The description of the diagram is provided for your reference: \n'
                f'{data["model_description"]}\n\n'
            )
        prompt += (
            'Instruction:\n'
            'Now analyze the diagram and provide its domain and type in a structured JSON format. \n'
            'The domain should be a specific field or area of knowledge. Choose only one option from the following list: \n'
            f'{domain_formatted_options}\n'
            'The type should describe the nature of the diagram. Choose only one option from the following list: \n'
            f'{type_formatted_options}\n'
            'Your output must be in the following JSON-like format: \n'
            '{'
            '  "Domain": "Your domain choice",'
            '  "Type": "Your type choice"'
            '}\n'
            'Do not provide any explanations or additional context. Only output the JSON object. \n'
            f'{special_tokens["eot"]}\n'
        )

    prompt += special_tokens["boasst"]

    return prompt


def get_special_tokens(args):
    if args.lvlm_path == "meta-llama/Llama-3.2-11B-Vision":
        special_tokens = {
            "botext": '<|begin_of_text|>\n\n',
            "bosys": '<|start_header_id|>system<|end_header_id|>\n',
            "bouser": '<|start_header_id|>user<|end_header_id|>\n',
            "boasst": '<|start_header_id|>assistant<|end_header_id|>\n',
            "eot": '<|eot_id|>\n',
            "img": 'Diagram: <|image|>\n'
        }
    else:
        special_tokens = {"botext": '', "bosys": '', "bouser": '', "boasst": '', "eot": '', "img": ''}
    return special_tokens


def wiki_text_info(data):
    page_data = data['webpages'][0]
    page_title = page_data.get('page_title', 'Unknown Title')
    page_desc = page_data.get('clean_page_description', 'Not provided.')
    image_ref_desc = data.get('image_ref_desc', 'Not provided.')
    image_ref_desc = image_ref_desc if len(image_ref_desc) <= 500 else 'Not provided.'

    prompt = (
        'The diagram is sourced from Wikipedia, and here is some background information: \n'
        f'- Page Title: {page_title}\n'
        f'- Page Description: {page_desc}\n'
        f'- Diagram Description: {image_ref_desc}\n'
        'Use the Wikipedia information above only if the diagram alone does not provide enough clarity or context. '
        'Always give priority to the information directly visible in the diagram for your analysis. '
        '\n\n'
    )
    return prompt


