import csv 
from pathlib import Path 
import fire 

from common .common_types import Sample 
from common .dataset import create_datasets 


def load_cruxeval_oneline (csv_path :str )->list [Sample ]:
    """
    Read / parse cruxeval_oneline.csv and build a list[Sample].
    The CSV must have columns: code, input, output, id.
    The function name is always 'f'.
    """
    csv_path =Path (csv_path )
    if not csv_path .exists ():
        raise FileNotFoundError (f"CSV not found: {csv_path }")

    samples :list [Sample ]=[]
    with csv_path .open ("r",newline ="",encoding ="utf-8")as f :
        reader =csv .DictReader (f )
        required ={"code","input","output","id"}
        missing =required -set (reader .fieldnames or [])
        if missing :
            raise ValueError (f"CSV is missing required columns: {sorted (missing )}")

        for row in reader :
            code =(row .get ("code")or "").strip ()
            inp =(row .get ("input")or "").strip ()
            out =(row .get ("output")or "").strip ()
            sid =str (row .get ("id")).strip ()

            if not code or not sid :
                continue 

            samples .append (
            Sample (
            sample_id =sid ,
            code =code ,
            input =inp ,
            output =out ,
            function_name ="f",
            )
            )
    return samples 


def main (
csv_path :str ="/path/to/home/lltm/07_pytracify_codeexec_dataset/datasets/cruxeval_oneline.csv",
data_name :str ="cruxeval_oneline",
output_dir :str ="/path/to/home/lltm/02_codeexec_etcot/scripts/instruction/convert_datasets",
train_ratio :float =0.0 ,
):
    """
    Create a dataset named `cruxeval_oneline` from the given CSV.

    Args:
        csv_path: Path to cruxeval_oneline.csv (columns: code,input,output,id).
        data_name: Dataset name to embed in output filenames/metadata.
        output_dir: Where to write train/val/test jsonl files.
        train_ratio: Ratio for train split (0.0 => all goes to val/test per your create_datasets logic).
    """
    samples =load_cruxeval_oneline (csv_path )
    create_datasets (data_name ,samples ,train_ratio =train_ratio ,output_dir =output_dir )


if __name__ =="__main__":
    fire .Fire (main )
