def get_description_prompt(args, data):
    special_tokens = get_special_tokens(args)

    prompt = special_tokens["botext"]
    prompt += (
        f'{special_tokens["bosys"]}'
        'You are a diagram description assistant. The term "diagram" here refers to any complicated image, '
        'including charts, infographics, illustrations, academic diagrams, and other visually complex representations. \n'
        'Your task is to provide a detailed description of the diagram, addressing the following four aspects: \n'
        '- Recognition: Identify and describe the key visual elements present in the diagram; \n'
        '- Understanding: Explain the relationships and interactions between these elements; \n'
        '- Grounding: Relate the diagram elements to real-world concepts or entities; \n'
        '- Reasoning: Interpret the diagram to draw conclusions or infer information beyond what is explicitly shown. \n'
        'Please formalize these four aspects respectively. '
        'Feel free to skip an aspect with "NA" if it is not reasonable to describe. \n'
        f'{special_tokens["eot"]}\n'
    )
    prompt += (
        f'{special_tokens["bouser"]}'
        f'{special_tokens["img"]}'
        f'{wiki_text_info(data) if args.use_wiki_text == "wiki" else ""}'
        'You must output your result in the following JSON-like format:\n'
        '{'
        '  "Recognition": "string or NA",'
        '  "Understanding": "string or NA",'
        '  "Grounding": "string or NA",'
        '  "Reasoning": "string or NA"'
        '}\n'
        f'{special_tokens["eot"]}\n'
    )
    prompt += special_tokens["boasst"]

    return prompt


def get_annotation_prompt(args, data):
    special_tokens = get_special_tokens(args)

    prompt = special_tokens["botext"]
    prompt += (
        f'{special_tokens["bosys"]}'
        'You are a question-answering annotation assistant. '
        'Your task is to analyze a diagram and annotate question-answering pairs. \n'
        f'{special_tokens["eot"]}\n'
    )

    prompt += (
        f'{special_tokens["bouser"]}'
        f'{special_tokens["img"]}'
        'The description of the diagram is provided for your reference: \n'
        f'{data["model_description"]}\n\n'
        'Instruction:\n'
        'Create four multiple-choice questions based on the diagram, each focusing on one of the four aspects:\n '
        'Question 1 (Recognition): Test the identification of elements in the diagram; '
        'Question 2 (Understanding): Assess comprehension of the relationships or processes depicted; '
        'Question 3 (Grounding): Evaluate the ability to connect diagram elements to real-world knowledge; '
        'Question 4 (Reasoning): Challenge inference or prediction based on the diagram. \n'
        'For each question: \n'
        '- Provide a clear question statement. \n'
        '- Offer exactly four options labeled A, B, C, and D. \n'
        '- Indicate the correct answer, which must be one and only one among A, B, C, or D. \n'
        'You must output your result in the following JSON-like format:\n'
        '{'
        '  "Recognition": {"Question": "string", "Options": {"A": "string", "B": "string", "C": "string", "D": "string"}, "Answer": "A/B/C/D"}, '
        '  "Understanding": {"Question": "string", "Options": {"A": "string", "B": "string", "C": "string", "D": "string"}, "Answer": "A/B/C/D"}, '
        '  "Grounding": {"Question": "string", "Options": {"A": "string", "B": "string", "C": "string", "D": "string"}, "Answer": "A/B/C/D"}, '
        '  "Reasoning": {"Question": "string", "Options": {"A": "string", "B": "string", "C": "string", "D": "string"}, "Answer": "A/B/C/D"}'
        '}\n'
        'If an aspect is not reasonable to annotate, output: \n'
        '  "AspectName": "NA"\n'
        f'{special_tokens["eot"]}\n'
    )

    prompt += special_tokens["boasst"]

    return prompt


def get_special_tokens(args):
    if args.lvlm_path == "meta-llama/Llama-3.2-11B-Vision":
        special_tokens = {
            "botext": '<|begin_of_text|>\n\n',
            "bosys": '<|start_header_id|>system<|end_header_id|>\n',
            "bouser": '<|start_header_id|>user<|end_header_id|>\n',
            "boasst": '<|start_header_id|>assistant<|end_header_id|>\n',
            "eot": '<|eot_id|>\n',
            "img": 'Diagram: <|image|>\n'
        }
    else:
        special_tokens = {"botext": '', "bosys": '', "bouser": '', "boasst": '', "eot": '', "img": ''}
    return special_tokens


def wiki_text_info(data):
    page_data = data['webpages'][0]
    page_title = page_data.get('page_title', 'Unknown Title')
    page_desc = page_data.get('clean_page_description', 'Not provided.')
    image_ref_desc = data.get('image_ref_desc', 'Not provided.')
    image_ref_desc = image_ref_desc if len(image_ref_desc) <= 500 else 'Not provided.'

    prompt = (
        'The diagram is sourced from Wikipedia, and here is some background information: \n'
        f'- Page Title: {page_title}\n'
        f'- Page Description: {page_desc}\n'
        f'- Diagram Description: {image_ref_desc}\n'
        'Use the Wikipedia information above only if the diagram alone does not provide enough clarity or context. '
        'Always give priority to the information directly visible in the diagram for your analysis. '
        '\n\n'
    )
    return prompt

