import argparse 
import os 
import sys 
import json 

from omegaconf import OmegaConf 


os .environ ["HF_HUB_ENABLE_HF_TRANSFER"]="1"

from ltm .utils import instantiate_from_config 

def get_parser ():
    parser =argparse .ArgumentParser ()
    parser .add_argument (
    "-b",
    "--base",
    nargs ="*",
    metavar ="base_config.yaml",
    help ="paths to base configs. Loaded from left-to-right. "
    "Parameters can be overwritten or added with command-line options of the form `--key value`.",
    default =list (),
    )

    parser .add_argument ("--datadir",type =str ,default ="/path/to/home/lltm/data",help ="save dir for the data")
    parser .add_argument ("--n",type =int ,default =3000 ,help ="number of samples to generate")


    return parser 


def main ():
    sys .path .append (os .getcwd ())
    parser =get_parser ()
    args ,unknow =parser .parse_known_args ()

    configs =[OmegaConf .load (cfg )for cfg in args .base ]
    cli =OmegaConf .from_dotlist (unknow )
    config =OmegaConf .merge (*configs ,cli )

    if config .data .params .transform_config .target !="ltm.data.transforms.PytracifyTransformForCR":
        raise ValueError ("This data transformation is not supported: {}".format (config .data .params .transform_config .target ))
    config .data .params .transform_config .target ="ltm.data.transforms.PytracifyTransformForCRraw"
    config .data .params .transform_config .params .return_answer =True 

    config .data .params .num_train =args .n 
    config .data .params .num_val =1 
    config .data .params .num_test =1 

    data :L .LightningDataModule =instantiate_from_config (
    OmegaConf .to_container (config .data ,resolve =True )
    )
    data .setup (stage ='fit')

    dataloader =data .train_dataloader ()
    dataset =dataloader .dataset 
    task_name =config .data .params .crbench_task_config .target .split (".")[-1 ]
    with open (args .datadir +"/"+task_name +".jsonl","w",encoding ="utf-8")as f :
        for x in dataset :
            data =x ["text"]
            message ={"messages":[{"role":"assistant","content":data }]}
            json_line =json .dumps (message ,ensure_ascii =False )
            f .write (json_line +"\n")

if __name__ =="__main__":
    main ()
