import os
import subprocess
import utils

def experiment(experiment_name, env, method, runs, epochs, beta, lookahead,
 new_data_train_threshold = None, max_buffer_length = None, cold_start = None, save = None, loaddir=None,
 maneuver = None):
	HYPERPARAMETERS = {
	'RUNS' : str(runs),
	'EPOCHS' : str(epochs),
	'NEW_DATA_TRAIN_THRESHOLD' : str(new_data_train_threshold) if new_data_train_threshold else "0",
	'METHOD' : method,
	'BETA' : str(beta),
	'LOOKAHEAD' : str(lookahead),
	'ENV' : env,
	'MAX_BUFFER_LENGTH' : str(max_buffer_length) if max_buffer_length else "10e15",
	'COLD_START_STEPS' : str(cold_start) if cold_start else "0",
	'SAVE' : save,
	'LOADDIR' : loaddir,
	'MANEUVER' : maneuver
	}

	assert not os.path.exists(f"Experiments/{HYPERPARAMETERS['ENV']}/{HYPERPARAMETERS['METHOD']}/REWORK/{experiment_name}/run0"), "Output directory already exists, ensure you are not overwriting experiment data."
	for i in range(int(HYPERPARAMETERS['RUNS'])):
		command = ["python", "8_online.py", "-env", HYPERPARAMETERS['ENV'],
		"-logdir", f"Experiments/{HYPERPARAMETERS['ENV']}/{HYPERPARAMETERS['METHOD']}/REWORK/{experiment_name}/run{i}",
		"-o", "exp_cfg.exp_cfg.ntrain_iters", HYPERPARAMETERS['EPOCHS'],
		"--NEW_DATA_TRAIN_THRESHOLD", HYPERPARAMETERS['NEW_DATA_TRAIN_THRESHOLD'], "--METHOD", HYPERPARAMETERS['METHOD'],
		"--S_FUT_KL_CST", HYPERPARAMETERS['BETA'], "--S_FUT_LASTEPS", HYPERPARAMETERS['LOOKAHEAD'],
		"--MAX_BUFFER_LENGTH", HYPERPARAMETERS['MAX_BUFFER_LENGTH'], "--COLD_START_STEPS", HYPERPARAMETERS['COLD_START_STEPS'], 
		"--MANEUVER", HYPERPARAMETERS['MANEUVER']]
		if HYPERPARAMETERS['SAVE']:
			command.append("--SAVE")
		if loaddir != None:
			command.append("--LOAD")
			command.append(HYPERPARAMETERS['LOADDIR'])

		for arg in command:
			print(arg + " ", end="")
		subprocess.run(command)

	utils.to_json(HYPERPARAMETERS, f"Experiments/{HYPERPARAMETERS['ENV']}/{HYPERPARAMETERS['METHOD']}/REWORK/{experiment_name}/hyperparameters.txt")

experiment_queue = [
        #To add an experiment to the queue, add a list of arguments with the following order:
        #[ExperimentName, environment, method, num_runs, num_epochs, beta, lookahead, new_data_train_threshold, max_buffer_length, cold_start_steps, save_flag, model_load_directory, maneuver]

        #Simple environments; training past convergence (Experiment 1)
        ["Sector1UnlimitedBuffer", "pointbot", "BASELINE", 1, 100, 0.5, 10, 0.01, None, None, True, None, 'sector_1'],
        ["Sector1UnlimitedBuffer", "pointbot", "UARF", 1, 100, 0.5, 10, 0.01, None, None, True, None, 'sector_1'],
	["Sector1UnlimitedBuffer", "pointbot", "BICHO", 1, 100, 0.5, 10, 0.01, None, None, True, None, 'sector_1'],

        ["CartpoleUnlimitedBuffer", "cartpole", "BASELINE", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
	["CartpoleUnlimitedBuffer", "cartpole", "UARF", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
        ["CartpoleUnlimitedBuffer", "cartpole", "BICHO", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],

        ["ReacherUnlimitedBuffer", "reacher", "BASELINE", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
	["ReacherUnlimitedBuffer", "reacher", "BICHO", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
	["ReacherUnlimitedBuffer", "reacher", "UARF", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
	
	
	["PusherUnlimitedBuffer", "pusher", "UARF", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
	["PusherUnlimitedBuffer", "pusher", "BICHO", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
	["PusherUnlimitedBuffer", "pusher", "BASELINE", 1, 100, 0.005, 10, 0.01, None, None, True, None, 'sector_1'],
]

for e in experiment_queue:
	experiment(*e)
