
import os

import sys

sys.path.insert(0,

                os.path.abspath(os.path.join(os.path.dirname(__file__),

                                             '')))

import argparse

import pandas as pd

import json

import numpy as np

from models import opensource, claude, gpt

from utils import textprocessing

from utils.clustering import clustering

from utils.clustering import lexical_diversity

from dataclasses import dataclass

import yaml

from tqdm import tqdm





@dataclass

class Arguments:

    root: str = ''

    folder: str = ''

    split: str = ''

    model: str = ''

    prompt: str = ''

    template: str = ''

    temperature: float = 1.0

    max_length: int = 512

    num_return_sequences: int = 1



def load_arguments_from_yaml(yaml_file):

    with open(yaml_file, '') as file:

        args_dict = yaml.safe_load(file)

    return Arguments(**args_dict)





if __name__ == '':

    parser = argparse.ArgumentParser()

    parser.add_argument('', type=str, default='')

    parser.add_argument('', type=str, default='')

    parser.add_argument('', type=str, default='')

    parser.add_argument('', type=str, default='')

    parser.add_argument('', type=float, default=1.0)

    parser.add_argument('', type=int, default=512)

    parser.add_argument('', type=int, default=1)

    args = parser.parse_args()

    

    

    if '' in args.model or '' in args.model:

        pipe = gpt.GPTModel(model_name=args.model)

    elif args.model in ['', '', '']:

        pipe = claude.ClaudeModel(model_name=args.model)

    else:

        pipe = opensource.OpensourceModel(model_name=args.model)



    

    print(f'reading in data from {args.root}/{args.folder}/{args.split}_descriptions_and_testcases.jsonl')

    path = os.path.join(args.root, args.folder, f'{args.split}_descriptions_and_testcases.jsonl')

    df = pd.read_json(path, lines=True, orient='')



    

    client, image = clustering.build_docker_image(clustering.clustering_abs_dir)



    results = []

    count = 0

    

    for index, row in tqdm(df.iterrows()):

        if index > 100:

            break

        

        result = {}

        result[''] = args.model

        result[''] = index

        

        prompt = row['']

        

        

        prompt = prompt.replace('', '').replace('', '') + ''

        

        generateds_program = pipe.generate(

            prompt, temperature=args.temperature,

            max_length=1024,

            do_sample= False, return_dict_in_generate=True, output_scores=True,

            repetition_penalty=20.0, num_return_sequences=args.num_return_sequences)

        

        programs = [textprocessing.extract_python_code(g) for g in generateds_program]

 

        

        if all([program is None for program in programs]):

            continue

        result[''] = prompt

        result[''] = programs

        testcase_inputs = row['']

        result[''] = testcase_inputs

        testcase_outputs = row['']



        

        output_records = [clustering.instrument_code_docker(

            program, 

            testcase_inputs, 

            testcase_outputs,

            image, 

            client,

            n_test_cases=-1, 

            indiv_tc_timeout=20, 

            verbose_docker=True) for program in programs if program is not None]

        result[''] = output_records



        

        if type(output_records) is not list:

            output_record = [output_records]

        coherence, n_outputs, n_coherent = clustering.report_coherence(output_records)

        result[''] = coherence

        result[''] = n_outputs

        result[''] = n_coherent



        

        

        accuracy = clustering.report_accuracy(output_records)

        accuracies = []

        for program in programs:

            if program is not None:

                accuracies.append(accuracy[program])

            else:

                accuracies.append(0.0)

        result[''] = accuracies



        

        program_2_semantic_string, semantic_strings_2_programs = clustering.make_semantic_strings(output_records)

        semantic_count = len(semantic_strings_2_programs.keys())

        print('', semantic_count)

        result[''] = semantic_count

        result[''] = program_2_semantic_string

        result[''] = semantic_strings_2_programs



        

        

        

        programs = [program for program in programs if program is not None]



        if len(programs) >= 2:

            distinct_1 = lexical_diversity.distinct_n(programs, 1, lexical_diversity.codebert_tokenizer)

            distinct_2 = lexical_diversity.distinct_n(programs, 2, lexical_diversity.codebert_tokenizer)

            distinct_3 = lexical_diversity.distinct_n(programs, 3, lexical_diversity.codebert_tokenizer)

            corpus_self_bleu = lexical_diversity.parallel_corpus_self_bleu(programs, lexical_diversity.codebert_tokenizer, n_jobs=-1, normalize=True)

            result[''] = distinct_1

            result[''] = distinct_2

            result[''] = distinct_3

            result[''] = corpus_self_bleu

        else:

            result[''] = 0.0

            result[''] = 0.0

            result[''] = 0.0

            result[''] = 0.0

        

        

        

        results.append(result)

        if count % 10 == 0:

            

            with open(f'../collected/{args.model}_{args.folder}_{args.split}_diversity_results.jsonl', '') as f:

                for result in results:

                    f.write(json.dumps(result) + '')

            count += 1

        

        



    

    with open(f'../collected/{args.model}_{args.folder}_{args.split}_diversity_results.jsonl', '') as f:

        for result in results:

            f.write(json.dumps(result) + '')

    

    

    df_results = pd.DataFrame(results)

    df_results_stats = df_results[['', '', '', '', '', '', '', '']]

    described = df_results_stats.describe()

    print(described)

    

    described.to_csv(f'../collected/{args.model}_{args.folder}_{args.split}_results_stats.csv')

    

    print('')

