import argparse
import csv
import itertools
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom

def prettify(elem):
    """Return a pretty-printed XML string for the Element."""
    rough_string = tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="")

def remove_nul_characters(filename):
    with open(filename, 'rb') as file:
        data = file.read().decode('utf-8').replace('\r', '')

    # Replace NUL characters
    clean_data = data.replace('\0', '')
    clean_data = clean_data.replace('\r', '')

    # Write the cleaned data to a temporary file
    temp_filename = filename + '.tmp'
    with open(temp_filename, 'w', encoding='utf-8') as file:
        file.write(clean_data)

    return temp_filename

def create_xml(dataset, csv_file_path, model, questions, temperature, max_item, fmt):
    # Root element
    lessons = Element('lessons')
    
    cleaned_csv_file_path = remove_nul_characters(csv_file_path)

    print(cleaned_csv_file_path)
    # Read from CSV and create exercises
    with open(cleaned_csv_file_path, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=';', quoting=csv.QUOTE_NONE)
        for i, row in enumerate(reader):

            exercise = row[0]
            context = ';'.join(row[1:])

            if not len(exercise):
                continue

            # Nested elements for lessons
            if fmt == "cot":
                lesson = SubElement(lessons, 'lesson', id=f'{dataset}_cot_{model}_{questions}_{temperature}_{max_item}_train_{i}')
            elif fmt == "default":
                lesson = SubElement(lessons, 'lesson', id=f'{dataset}_default_{model}_{questions}_{temperature}_{max_item}_train_{i}')
            
            # Material description
            material = SubElement(lesson, 'material')
            if fmt == "cot":
                material.text = f"""{context}

Please answer the following question. Reason step by step.
"""
            elif fmt == "default":
                material.text = f"{context}"
            ex_element = SubElement(lesson, 'exercise')
            ex_element.text = exercise

    # Return formatted XML
    try:
        return prettify(lessons)
    except:
        print(f"Failure to prettify output for {csv_file_path}")
        return tostring(lessons, 'unicode')

def main(
        dataset: str = "nyt",
        temperature: float = 1.5,
        model: str = "llama3-8b-instruct",
        questions: int = 30,
        max_item: int = 1000,
        fmt: str = "default"
    ):
    csv_file_path = f"questions/{dataset}/{model}/questions_{questions}_{temperature}_{max_item}.csv"
    if fmt == "cot":
        output_path = f"curriculum/lesson_{dataset}_cot_{model}_{questions}_{temperature}_{max_item}.xml"
    elif fmt == "default":
        output_path = f"curriculum/lesson_{dataset}_default_{model}_{questions}_{temperature}_{max_item}.xml"

    print(f"Processing {csv_file_path}")
    xml_output = create_xml(dataset, csv_file_path, model, questions, temperature, max_item, fmt)
    with open(output_path, 'w') as f:
        f.write(xml_output)
    print(f"XML written to {output_path}")

if __name__ == "__main__":
    from jsonargparse import CLI
    CLI(main)
