import sys
import pandas as pd
import numpy as np
import os
import pickle
import warnings
import json
from utils import get_all_embedding_kernels, get_optimal_actions, run_experiment, get_similarity_matrix, get_starting_data
from kernels import L1Norm
from models.svr import SVM
from models.kr import KR
from models.gp import GP

warnings.simplefilter(action='ignore', category=FutureWarning)

# Define parameters for this run
MIN_REWARD = 0
MAX_REWARD = 100
NUM_POSSIBLE_ACTIONS = 50
EXPERIMENT_NAME = sys.argv[1]  # pick informative name; output will be written to this folder
human_value = sys.argv[2]
VALUE = human_value + '_score' # name of the value column to use for the value function
N_RUNS = int(sys.argv[3])
kernel_fn = L1Norm

# Load all the pre-computed kernels from embedding models
kernels = get_all_embedding_kernels("./models/embedding_kernels/")

# Define a list of embedding model names for convenience
model_names = [i for i in kernels.keys()]

# List of metrics to track
metrics = ['mean_reward', 'unique_actions_taken', 'non_optimal_actions_taken', 'negative_actions_taken',
           'iterations_to_convergence']

# Read in the train/test actions. 
with open('train.txt', 'r') as file:
    train_actions = [int(i) for i in file.readline().split(", ")]

with open('test.txt', 'r') as file:
    test_actions = [int(i) for i in file.readline().split(", ")]

base_path = './data/' + EXPERIMENT_NAME
if not os.path.exists(base_path):
    os.mkdir(base_path)

base_path = base_path + '/' + VALUE
if not os.path.exists(base_path):
    os.mkdir(base_path)

# Read in the ground truth morality & action data just once. It will be used by all models. 
data = pd.read_csv('./data/justice_50_actions_with_values.csv')  
actions = np.array([int(i) for i in range(NUM_POSSIBLE_ACTIONS)]).astype(int)
descriptions, real_morality_scores = list(data['action_label']), list(data[VALUE])
real_optimal_actions = get_optimal_actions(NUM_POSSIBLE_ACTIONS, actions, real_morality_scores)

# Run one big loop for each embedding based model. 
for embedding_model in model_names:
    # Run each of the kernel based models a few times using the embedding based models' kernels.
    model = KR(actions, kernels[embedding_model])
    
    for run in range(N_RUNS):
        # Run experiment.
        path = base_path + "/" + embedding_model + "/kr/train/"
        filename = path + "run_" + str(run) + "_summary.txt"
        if not os.path.exists(path):
            os.makedirs(path)

        starting_actions, starting_scores = [], []

        # Phase 1: Training
        # Track metrics for the training process. Update models at each timestep.
        single_run_metrics = run_experiment(actions, starting_actions, starting_scores, real_morality_scores, real_optimal_actions,
                                            model, n_allowed_actions=10, immoral_threshold=50, max_iterations = 1000,
                                            allowed_actions='allowed_actions_1k_train.txt', history_filename=path + "run_" + str(run) + ".csv",
                                            test=False)

        # Save out training metrics from this individual run
        for metric in single_run_metrics.keys():
            # Put in a list so it can be parsed to a DataFrame row
            single_run_metrics[metric] = [single_run_metrics[metric]]

        trial = pd.DataFrame(single_run_metrics, columns=metrics)
        if os.path.exists(path + 'run_data.csv'):
            run_metrics = pd.concat([pd.read_csv(path + 'run_data.csv'), trial])
        else:
            run_metrics = trial
        run_metrics.to_csv(path + 'run_data.csv', index=False)

        # Phase 2: Testing
        # The model is no longer being updated on new data. It is simply evaluated on metrics over
        # the fixed 1k trials.
        path = base_path + "/" + embedding_model + "/kr/test/"
        if not os.path.exists(path):
            os.makedirs(path)

        single_run_metrics = run_experiment(actions, starting_actions, starting_scores, real_morality_scores, real_optimal_actions,
                                            model, n_allowed_actions=10, immoral_threshold=50, max_iterations = 1000,
                                            allowed_actions='allowed_actions_1k_test.txt', history_filename=path + "run_" + str(run) + ".csv",
                                            test=True)

        # Save out test metrics from this individual run
        for metric in single_run_metrics.keys():
            # Put in a list so it can be parsed to a DataFrame row
            single_run_metrics[metric] = [single_run_metrics[metric]]

        trial_test = pd.DataFrame(single_run_metrics, columns=metrics)
        if os.path.exists(path + 'run_data.csv'):
            run_metrics = pd.concat([pd.read_csv(path + 'run_data.csv'), trial_test])
        else:
            run_metrics = trial_test
        run_metrics.to_csv(path + 'run_data.csv', index=False)

        
