from typing import List 
import os 

import fire 

from common .common_types import Sample 
from common .dataset import create_datasets 


def escape_quotes (s :str )->str :
    s =s .replace ("'","'")
    s =s .replace ('"','"')
    return repr (s )


def load_len (data_dir )->List [Sample ]:
    samples =[]

    with open (
    os .path .join (data_dir ,"lltm-input-data/len_false.log"),
    "r",
    encoding ="utf-8",
    )as f :
        for i ,token in enumerate (f ):
            token =token .strip ()
            samples .append (
            Sample (
            sample_id =str (i ),
            code =f"print(len({escape_quotes (token )}))",
            input =token ,
            output =str (len (token )),
            function_name ="",
            )
            )
    with open (
    os .path .join (data_dir ,"lltm-input-data/len_false2.log"),
    "r",
    encoding ="utf-8",
    )as f :
        for i ,token in enumerate (f ):
            token =token .strip ()
            samples .append (
            Sample (
            sample_id =str (i ),
            code =f"print(len({escape_quotes (token )}))",
            input =token ,
            output =str (len (token )),
            function_name ="",
            )
            )
    return samples 


def main (
data_name :str ="len-false",
input_dir :str ="/path/to/home/data",
output_dir :str ="/path/to/home/lltm/02_codeexec_etcot/scripts/instruction/convert_datasets",
):
    all_samples =load_len (data_dir =input_dir )
    create_datasets (data_name ,all_samples ,output_dir =output_dir )


if __name__ =="__main__":
    fire .Fire (main )
