import math
import pandas as pd
import numpy as np
from importance_sampling import Importance_Sampling
from utils.constant import PROBLEM_FEATURES
from tensorflow.keras.models import load_model
import tensorflow as tf
import pickle
import csv



def main(raw_data, nn_model, policy):
    gamma = config['gamma']

    predicted = nn_model.predict(student_observations)

    df = raw_data[['userID', 'action', 'problem', 'critical', 'cluster', 'prob_ps', 'prob_fwe', 'prob_we', 'hope_reward']]
    df['real_action'] = 1
    df['real_action'] = np.where(df['action'] == 'problem', 0, df.real_action)
    df['real_action'] = np.where(df['action'] == 'example', 2, df.real_action)
    df['ps'] = predicted[:, 0]
    df['fwe'] = predicted[:, 1]
    df['we'] = predicted[:, 2]


    policy_est = Importance_Sampling(df, 0.1, gamma, policy)
    policy_est.readData()
    value = policy_est.WIS()
    
    print("{},{},{}".format(policy, ope, value))

if __name__ == "__main__":

	# load data
	file_name = 'features_all_prob_action_immediate_reward'
    data_path = '../data/augmented_dataset/{}.csv'.format(file_name)
    raw_data = pd.read_csv(data_path)
    feature_list = PROBLEM_FEATURES
    student_observations = raw_data[feature_list].values
    print('finish loading data')

    # preprocess data for TICC clustering
	ticc_data = raw_data[['userID','time']+feature_list]
	clusters = ticc_cluster(ticc_data)
	raw_data['cluster'] = clusters

    # load trained model for Q values & DQN policies
    nn_str = 'model/problem.h5'
    nn_model = load_model(nn_str)

    # OPE estimate return of each policy
    policy_list = ['DQN1', 'DQN2', 'DQN3', 'Expert']
    for policy in policy_list:  
        main(raw_data, nn_model, policy)

