import json
import os

def preprocess_general_task_dataset(input_file, output_file):
    """
    Preprocesses datasets by merging questions and choices into a single string.
    Maps numeric indices to choice letters (e.g., 0 -> 'A').
    """
    # Load the source dataset
    if not os.path.exists(input_file):
        print(f"Error: {input_file} not found.")
        return

    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    processed_data = []

    for item in data:
        question = item.get("question", "")
        choices = item.get("choices", [])
        answer_index = item.get("answer", 0)

        # Map integer index to letter (65 is ASCII for 'A')
        if isinstance(answer_index, int) and 0 <= answer_index < len(choices):
            answer_letter = chr(65 + answer_index)
        else:
            answer_letter = str(answer_index)  # Fallback to original if not an index

        # Format choice list: "A. Choice1\nB. Choice2..."
        formatted_choices = []
        for i, choice in enumerate(choices):
            letter = chr(65 + i)
            formatted_choices.append(f"{letter}. {choice}")
        
        # Combine question with choices using a double newline for clarity
        choices_block = "\n\n" + "\n".join(formatted_choices)
        
        processed_data.append({
            "question": f"{question}{choices_block}",
            "answer": answer_letter
        })

    # Export processed data to JSON
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(processed_data, f, ensure_ascii=False, indent=2)

    print(f"Success: Processed {len(processed_data)} items saved to {output_file}")

if __name__ == "__main__":
    # Example usage for general tasks
    INPUT_PATH = "mmlu_redux2_dataset.json"
    OUTPUT_PATH = "mmlu_redux2_processed.json"
    
    preprocess_general_task_dataset(INPUT_PATH, OUTPUT_PATH)