import editdistance
import os
import json
import pandas as pd
import argparse
from pathlib import Path
import yaml
import http.client
from neuspell import BertChecker
from spellchecker import SpellChecker
import editdistance
import numpy as np
import time

class BaseCorrector():
    def __init__(self, corpus_textlist):
        self.corpus_textlist = corpus_textlist
        return

    def correct(self, data):
        subwords = data.split(" ")
        res = []
        for subword in subwords:
            dist = np.array([editdistance.eval(subword,word) for word in self.corpus_textlist])
            min_dist = np.min(dist)
            min_dist_idx = np.where(dist == min_dist)[0]
            res.append(np.random.choice(np.array(self.corpus_textlist)[min_dist_idx]))
        return " ".join(res)

class PySpellChekerCorrector():
    def __init__(self):
        self.spell = SpellChecker()
        return

    def correct(self, data):
        subwords = data.split(" ")
        res = []
        for subword in subwords:
            corrected = self.spell.correction(subword)
            res.append(corrected if corrected else subword)
        return " ".join(res)

class NeuralCorrector():
    def __init__(self):
        self.checker = BertChecker()
        self.checker.from_pretrained()
        return

    def correct(self, data):
        return self.checker.correct(data)


class GPTCorrector():
    def __init__(self):
        return
    
    def correct(self, data):
        while True: # excecute until meaningful result is returned
            try:
                conn = http.client.HTTPSConnection("api.openai-proxy.com")
                # conn = http.client.HTTPSConnection("api.openai.com")
                payload = json.dumps({
                "model": "gpt-3.5-turbo",
                "messages": [
                        {
                            "role": "system",
                            "content": 'The following words may contain spelling errors by deleting, inserting and substituting letters. You are a corrector of spelling errors. Give only the answer without explication.'
                        },
                        {
                            "role": "user",
                            "content": f'What is the correct spelling of the action of "{data}"?'
                        }
                ],
                })
                headers = {
                'Accept': 'application/json',
                'Authorization': 'Bearer ', # your token
                'Content-Type': 'application/json'
                }
                conn.request("POST", "/v1/chat/completions", payload, headers)
                res = conn.getresponse().read().decode("utf-8")
                return json.loads(res)['choices'][0]['message']['content']
            except:
                time.sleep(3)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', '-cfg', type=str, default='./configs/ucf101/ucf_zero_shot.yaml') # './configs/ucf101/ucf_zero_shot.yaml' './configs/hmdb51/hmdb_zero_shot.yaml' './configs/k700/k700_zero_shot.yaml'
    parser.add_argument('--log_time', type=str, default='20240105_172223')
    parser.add_argument('--name', type=str, default='ucf') # 'kinetics_700' 'hmdb51' 'ucf'
    parser.add_argument('--idx_sim', type=int, default=1)
    args = parser.parse_args()

    with open(args.config, 'r') as f:
        config = yaml.full_load(f)
    working_dir = os.path.join('./exp', config['network']['type'], config['network']['arch'], config['data']['dataset'],
                               args.log_time)

    corrector = BaseCorrector(list(json.load(open("./en.json")).keys()))
    # corrector = PySpellChekerCorrector()
    # corrector = NeuralCorrector()
    # corrector = GPTCorrector()
    
    df = pd.read_csv(os.path.join(working_dir, f"{args.name}_labels_{args.idx_sim}.csv"), index_col=0)
    df.name = df.name.apply(lambda x: corrector.correct(x))
    df.to_csv(os.path.join(working_dir, f"{args.name}_labels_{args.idx_sim}.csv"))

if __name__ == '__main__':
    main()
