import openai
import sys
import json
import pandas as pd
import time
from utils import translate_named, compute_bleu, read_named_examples

with open('keys.json', 'r') as f:
    keys = json.load(f)

RLANG_TYPE_MAP = {
    'policy': 'Policy',
    'effect': 'Effect', 
}

def main(argv):
    valid_rlang = set(RLANG_TYPE_MAP.keys())
    if len(argv) != 2:
        print('Invalid number of arguments')
        print(f'Expected input: `python evaluate.py <{valid_rlang}>`')
        print(f'i.e. `python evaluate.py policy`')
        return
    elif argv[1] not in valid_rlang:
        print(f'Invalid argument "{argv[1]}"". evaluate.py only evaluates the following RLang statements:', valid_rlang)
        print(f'{argv[1]} is not a valid RLang statement')
        return

    environment = 'lunar_lander' # change environment as needed

    examples_df = read_named_examples(f'data/{argv[1]}_english.txt', f'data/{argv[1]}_names.txt', f'data/{argv[1]}_rlang.txt')

    openai.api_key = keys['codex']
    kwargs = {'engine': 'code-davinci-002', 'max_tokens': 200, 'temperature': 0, 'frequency_penalty': 0.0, 'presence_penalty': 0.0, 'stop': ['\n\n']}

    eval_df = pd.read_csv(f'generate/data/{environment}_nl_rlang_{argv[1]}.csv')
    english_eval = eval_df['english'].values
    rlang_eval = eval_df['rlang'].values
    vocab_eval = eval_df['vocab'].values
    bleu_scores = []
    translations = []

    MAX_EVALUATION = 10 # update if you want to evaluate more
    print(f'translating and evaluating {MAX_EVALUATION} phrases...')
    for i in range(MAX_EVALUATION):
        try:
            translation = translate_named(english_eval[i], examples_df, vocab_eval[i])[1]
        except:
            time.sleep(10)
            translation = translate_named(english_eval[i], examples_df, vocab_eval[i])[1]
        bleu_score = compute_bleu(translation, rlang_eval[i])
        translations.append(translation)
        bleu_scores.append(bleu_score)
        print('translation', translation)
        print('actual', rlang_eval[i])
        print('bleu_score', bleu_score)


    output_csv = f'evaluation_results_{argv[1]}.csv'
    df = pd.DataFrame({"original_english": english_eval[:MAX_EVALUATION], "vocab": vocab_eval[:MAX_EVALUATION], "rlang": rlang_eval[:MAX_EVALUATION], "translation": translations[:MAX_EVALUATION], "bleu_score": bleu_scores[:MAX_EVALUATION]})
    df.to_csv(output_csv, index=False)
    print(f'DONE! final results in {output_csv}')

if __name__ == '__main__':
    main(sys.argv)
