import re
import pandas as pd
from datasets import load_dataset, concatenate_datasets
import json
import os
import numpy as np
from typing import Dict, List, Any, Optional, Tuple

   
def main():
    name = "N4T200"

    print("Loading Countdown dataset...")
    # Load MATH-500 dataset
    countdown_dataset = load_dataset(f"xxx98/Countdown-{name}-val", split="train")
    print(f"Loaded Countdown with {len(countdown_dataset)} samples")
    
    print("Processing Countdown samples...")
    # Process Countdown samples
    processed_test = []
    for sample in countdown_dataset:
        processed_test.append(sample)
    
    print(f"Processed {len(processed_test)} Countdown samples")
    
    # Create output directory if it doesn't exist
    os.makedirs(f"data/countdown", exist_ok=True)
    
    # Convert test to DataFrame
    test_df = pd.DataFrame(processed_test)
    test_df.to_parquet(f"data/countdown/val-{name}.parquet", index=False)

    print("\nProcessing complete!")
    print(f"Test samples: {len(test_df)}")


if __name__ == "__main__":
    main()