import numpy as np
import tensorflow as tf
print(tf.__version__)
import random as rn


import itertools
import pandas as pd

from data_engineering.analyze_data import analyze
from data_engineering.generate_synthetic_dataset import generate_brownian_asset_dynamics
from models.dos_model import train_dos_model
from models.rrlsm_model import train_rrlsm_model
from models.fqi_model import train_fqi_model
from models.ospg_model import train_ospg_model

#REPRODUCIBILITY
SEED = 42
import os
import time
os.environ['PYTHONHASHSEED'] = '0'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
np.random.seed(SEED)
rn.seed(SEED)
tf.random.set_seed(SEED)


TRAIN_FRACTION = 0.5
VAL_FRACTION = 0.2

RESULTS_DIR = './'
EXPERIMENT_FNAME = RESULTS_DIR + 'bermudan_max_call_experiment.csv'

FOLDS = 10
NUM_ASSETS = [20, 50, 100, 200]
CURRENT_PRICE = [90, 100, 110]


NUM_TRAJECTORIES = [40000]


def main():
    global time
    start_time = time.time()
    experiment_df = pd.DataFrame(
    {
        'algorithm': [],
        'num_assets': [],
        'current_price': [],
        'fold': [],
        'reward': [],
        'num_trajectories': [],
        'train_time': [],
        'prediction_time_per_ts': []
    })

    experiments = list(itertools.product(NUM_ASSETS, CURRENT_PRICE, NUM_TRAJECTORIES))
    exp_ctr = 0
    exp_row = 0
    for experiment_params in experiments:
        exp_ctr = exp_ctr + 1
        option_parameters = {
            'risk_free_rate': 0.05,
            'dividend_yield': 0.1,
            'volatility_sigma': 0.2,
            'time_horizon_yrs': 3,
            'num_exercise_opportunities': 9,
            'option_strike_price': 100,
            'num_assets': experiment_params[0],
            'current_price': experiment_params[1],
            'rhoij': 0,
            'option_type': 'bermudan_max_call'
        }

        X, R = generate_brownian_asset_dynamics(option_parameters, experiment_params[2], rs=42)
        data_stats_dict = analyze(X, R, TRAIN_FRACTION, VAL_FRACTION, FOLDS)

        ospg_config = {
            'batch_size': 64,
            'os_epochs': 100,
            'samples_per_epoch': 200,
            'os_lr': 0.001,
            'L': option_parameters['num_exercise_opportunities'],
            'F': option_parameters['num_assets'],
            'clipnorm': 5,
            'include_R': True,
            'use_DNN': True,
            'num_stacked_layers': 2,
            'units_hidden': 20
        }

        dos_config = {
            'batch_size': 64,
            'dos_epochs': 100,
            'samples_per_epoch': 200,
            'dos_lr': 0.001,
            'clipnorm': 5,
            'omit_time_zero': True,
            'num_stacked_layers': 2,
            'units_hidden': 20
        }

        rrlsm_config = {
            'samples_per_epoch': 200,
            'ker_std': 0.0001,
            'rec_std': 0.3,
            'include_R': True,
            'num_stacked_layers': 1,
            'units_hidden': 20
        }

        fqi_config = {
            'batch_size': 64,
            'q_epochs': 100,
            'samples_per_epoch': 200,
            'q_lr': 0.001,
            'clipnorm': 5,
            'use_DNN': True,
            'include_R': True,
            'num_stacked_layers': 2,
            'units_hidden': 20
        }

        ## DOS
        dos_result = train_dos_model(dos_config, data_stats_dict, transform_str=None)
        for fold in range(FOLDS):
            experiment_df.loc[exp_row] = ['DOS', option_parameters['num_assets'],
                                          option_parameters['current_price'],
                                          fold, dos_result['dos_rewards'][fold],
                                          experiment_params[2],
                                          dos_result['train_times'][fold],
                                          dos_result['prediction_time_per_ts'][fold]]
            exp_row = exp_row + 1

        ## RRLSM
        rrlsm_result = train_rrlsm_model(rrlsm_config, data_stats_dict, transform_str=None, is_reward_flag=1)
        for fold in range(FOLDS):
            experiment_df.loc[exp_row] = ['RRLSM', option_parameters['num_assets'],
                                          option_parameters['current_price'],
                                          fold, rrlsm_result['rrlsm_rewards'][fold],
                                          experiment_params[2],
                                          rrlsm_result['train_times'][fold],
                                          rrlsm_result['prediction_time_per_ts'][fold]]
            exp_row = exp_row + 1

        ## FQI
        q_result = train_fqi_model(fqi_config, data_stats_dict, transform_str=None)
        for fold in range(FOLDS):
            experiment_df.loc[exp_row] = ['DNN_FQI', option_parameters['num_assets'],
                                          option_parameters['current_price'],
                                          fold, q_result['q_rewards'][fold],
                                          experiment_params[2],
                                          q_result['train_times'][fold],
                                          q_result['prediction_time_per_ts'][fold]]
            exp_row = exp_row + 1

        ## OSPG
        os_result = train_ospg_model(ospg_config, data_stats_dict, transform_str=None)
        for fold in range(FOLDS):
            experiment_df.loc[exp_row] = ['DNN_OSPG', option_parameters['num_assets'],
                                          option_parameters['current_price'],
                                          fold, os_result['os_rewards'][fold],
                                          experiment_params[2],
                                          os_result['train_times'][fold],
                                          os_result['prediction_time_per_ts'][fold]]
            exp_row = exp_row + 1

        print('done_experiment %d of %d' % (exp_ctr, len(experiments)))

    experiment_df.to_csv(EXPERIMENT_FNAME)
    end_time=time.time()
    print('run_time(min): ' + str((end_time-start_time)/60))
    print("done")


if __name__ == '__main__':
    main()