import matplotlib
matplotlib.use('Agg')

from datasets import SVMDataset, get_dataset, get_batches
from models import TorchRewardsModel, AutoEncoder
from algorithms import train_autoencoder, train_simple_regression, evaluate_reward_model, train_optimistic_reward, train_simple_regression_full_minimization, train_pure_exploitation, DataSetWithRegressionResponses, evaluate_rank_observed_reward


import matplotlib.pyplot as plt
import IPython
import torch
from six.moves import cPickle
import pandas as pd
import ray
import numpy as np
ray.init()


USE_RAY = True




def run_experiments_regression_full_minimization(random_init, MLP, train_dataset, representation_layer_size, num_batches, batch_size, num_opt_steps, l1 = False):

	reward_model = TorchRewardsModel(random_init = random_init, MLP = MLP, dim = train_dataset.dimension, representation_layer_size = representation_layer_size)
	print("Starting training")
	reward_model, max_observed_rewards = train_simple_regression_full_minimization(reward_model, train_dataset, num_batches = num_batches, 
			batch_size = batch_size, num_opt_steps = num_opt_steps, l1 = l1)
		
	ranks = [evaluate_rank_observed_reward(train_dataset, max_observed_reward_during_training) for max_observed_reward_during_training in max_observed_rewards]

	print("Max observed rewards - full minimization", max_observed_rewards)
	print("Ranks ", ranks)
	print("L1 {}".format(l1))
	return ranks, max_observed_rewards



@ray.remote
def run_experiments_regression_full_minimization_remote(random_init, MLP, train_dataset, representation_layer_size, nums_batches, batch_size, num_opt_steps, l1 = False):
	return run_experiments_regression_full_minimization(random_init, MLP, train_dataset, representation_layer_size, nums_batches, batch_size, num_opt_steps, l1 = l1)



def run_experiments_optimistic_reward(random_init, MLP, train_dataset, representation_layer_size, num_batches, batch_size, num_opt_steps, lambda_reward_max, l1 = False):

	reward_model = TorchRewardsModel(random_init = random_init, MLP = MLP, dim = train_dataset.dimension, 
			representation_layer_size = representation_layer_size)


	reward_model, max_observed_rewards, model_fitting_losses = train_optimistic_reward(reward_model, train_dataset, num_batches, num_opt_steps, batch_size, 
			lambda_reward_max = lambda_reward_max, verbose = False, l1 = l1)

	ranks = [ evaluate_rank_observed_reward(train_dataset, max_observed_reward_during_training) for max_observed_reward_during_training in max_observed_rewards] 
	print("Max observed rewards optimistic rewards - {}".format(lambda_reward_max), max_observed_rewards)
	print("Ranks ", ranks)
	print("L1 {} - ".format(l1))
	print("lambda - {}".format(lambda_reward_max))
	return ranks, max_observed_rewards, model_fitting_losses


@ray.remote
def run_experiments_optimistic_reward_remote(random_init, MLP, train_dataset, representation_layer_size, nums_batches, batch_size, num_opt_steps, lambda_reward_max, l1 = False):
	return run_experiments_optimistic_reward(random_init, MLP, train_dataset, representation_layer_size, nums_batches, batch_size, num_opt_steps, lambda_reward_max, l1 = l1)




def run_experiments_greedy(random_init, MLP, train_dataset, representation_layer_size, num_batches, batch_size, num_opt_steps, l1 = False):

	reward_model = TorchRewardsModel(random_init = random_init, MLP = MLP, dim = train_dataset.dimension, 
			representation_layer_size = representation_layer_size)

	reward_model, max_observed_rewards = train_pure_exploitation(reward_model, train_dataset, num_batches, num_opt_steps, batch_size, 
																	lambda_reward_max = .1, verbose = False, l1 = l1)

	ranks = [evaluate_rank_observed_reward(train_dataset, max_observed_reward_during_training) for max_observed_reward_during_training in max_observed_rewards]

	print("Max observed rewards greedy ", max_observed_rewards)
	print("Ranks ", ranks)
	print("L1 {}".format(l1))

	return ranks, max_observed_rewards


@ray.remote
def run_experiments_greedy_remote(random_init, MLP, train_dataset, representation_layer_size, nums_batches, batch_size, num_opt_steps, l1 = False):
	return run_experiments_greedy(random_init, MLP, train_dataset, representation_layer_size, nums_batches, batch_size, num_opt_steps, l1 = l1)




def autoencoder_analysis():	
	num_steps = 100
	eval_batch = get_batches(unsupervised_dataset, 1000000000)
	autoencoder = AutoEncoder(random_init = random_init, MLP = MLP, dim = unsupervised_dataset.dimension, representation_layer_size = representation_layer_size )
	initial_eval_loss = autoencoder.get_loss(eval_batch)
	autoencoder = train_autoencoder(autoencoder, unsupervised_dataset, num_steps = num_steps, batch_size = batch_size)
	eval_loss = autoencoder.get_loss(eval_batch)



def analyze_dataset(dataset_name, representation_layer_size, batch_size, num_batches,  MLP = True, random_init = True, num_opt_steps = 5000, num_experiments = 2, fit_regression_responses = True):


	(
	    train_dataset,
	    _,
	    unsupervised_dataset,
	) = get_dataset(dataset_name, 30, 10)


	if fit_regression_responses:
		train_dataset = DataSetWithRegressionResponses(train_dataset, MLP= MLP, 
			representation_layer_size = representation_layer_size, 
			num_steps = 10000, 
			batch_size = 20)
		# test_dataset = DataSetWithRegressionResponses(test_dataset, MLP= MLP, 
		# 	representation_layer_size = representation_layer_size, 
		# 	num_steps = 2000, 
		# 	batch_size = 20)
		# test_dataset.reward_model = train_dataset.reward_model



	name_identifier = "{}_regresponses_{}_batchsize_{}_MLP_{}_layersize_{}".format(dataset_name, fit_regression_responses, batch_size, MLP,representation_layer_size)


	print("Plot Histogram {}".format(dataset_name))

	### DATASET ANALYSIS
	all_X, all_y = get_batches(train_dataset, 1000000000000000)

	plt.hist(all_y, bins = 50)
	plt.savefig("./results/histogram_y_responses_{}_regresponses_{}.png".format(dataset_name, fit_regression_responses))
	plt.close("all")

	all_y = np.squeeze(all_y)	
	baseline = np.max(all_y)

	sorted_all_y = np.sort(all_y)
	top_100 = sorted_all_y[-100:]
	plt.hist(top_100, bins = 20)
	plt.savefig("./results/top100histogram_y_responses_{}_regresponses_{}.png".format(dataset_name, fit_regression_responses))
	plt.close("all")

	top_1000 = sorted_all_y[-1000:]
	plt.hist(top_1000, bins = 20)
	plt.savefig("./results/top1000histogram_y_responses_{}_regresponses_{}.png".format(dataset_name, fit_regression_responses))
	plt.close("all")




	print("#########################################################################")
	print("                               Starting greedy training {}".format(dataset_name))
	print("########################################################################")


	if USE_RAY:
		all_data = [run_experiments_greedy_remote.remote(random_init, MLP, train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps) for _ in range(num_experiments)]
		all_data = ray.get(all_data)

	else:
		all_data = [run_experiments_greedy(random_init, MLP, train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps) for _ in range(num_experiments)]




	greedy_ranks_data = [a for (a,b) in all_data]
	greedy_ranks_data = np.array(greedy_ranks_data)
	greedy_ranks_mean = np.mean(greedy_ranks_data, 0)
	greedy_ranks_std = np.std(greedy_ranks_data, 0)

	greedy_maxreg_data = [b for (a,b) in all_data]
	greedy_maxreg_data = np.array(greedy_maxreg_data)
	greedy_maxreg_mean = np.squeeze(np.mean(greedy_maxreg_data, 0))
	greedy_maxreg_std = np.squeeze(np.std(greedy_maxreg_data, 0))

	#IPython.embed()

	print("############################################################")
	print("                      Starting greedy l1 training {}".format(dataset_name))
	print("############################################################")



	if USE_RAY:
		all_data = [run_experiments_greedy_remote.remote(random_init, MLP, train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps, l1 = True) for _ in range(num_experiments)]
		all_data = ray.get(all_data)

	else:
		all_data = [run_experiments_greedy(random_init, MLP, train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps, l1 = True) for _ in range(num_experiments)]




	greedy_l1_ranks_data = [a for (a,b) in all_data]
	greedy_l1_ranks_data = np.array(greedy_l1_ranks_data)
	greedy_l1_ranks_mean = np.mean(greedy_l1_ranks_data, 0)
	greedy_l1_ranks_std = np.std(greedy_l1_ranks_data, 0)

	greedy_l1_maxreg_data = [b for (a,b) in all_data]
	greedy_l1_maxreg_data = np.array(greedy_l1_maxreg_data)
	greedy_l1_maxreg_mean = np.squeeze(np.mean(greedy_l1_maxreg_data, 0))
	greedy_l1_maxreg_std = np.squeeze(np.std(greedy_l1_maxreg_data, 0))





	print("############################################################")
	print("                  Starting full regression training {}".format(dataset_name))
	print("############################################################")


	if USE_RAY:
		all_data = [run_experiments_regression_full_minimization_remote.remote(random_init, MLP, 
			train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps) for _ in range(num_experiments)]
		all_data = ray.get(all_data)

	else:
		all_data = [run_experiments_regression_full_minimization(random_init, MLP, 
			train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps) for _ in range(num_experiments)]



	fullreg_ranks_data = [a for (a,b) in all_data]
	fullreg_ranks_data = np.array(fullreg_ranks_data)
	fullreg_ranks_mean = np.mean(fullreg_ranks_data, 0)
	fullreg_ranks_std = np.std(fullreg_ranks_data, 0)


	fullreg_maxreg_data = [b for (a,b) in all_data]
	fullreg_maxreg_data = np.array(fullreg_maxreg_data)
	fullreg_maxreg_mean = np.squeeze(np.mean(fullreg_maxreg_data, 0))
	fullreg_maxreg_std = np.squeeze(np.std(fullreg_maxreg_data, 0))




	print("############################################################")
	print("                  Starting full regression training {} l1".format(dataset_name))
	print("############################################################")


	if USE_RAY:
		all_data = [run_experiments_regression_full_minimization_remote.remote(random_init, MLP, 
			train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps, l1 = True) for _ in range(num_experiments)]
		all_data = ray.get(all_data)

	else:
		all_data = [run_experiments_regression_full_minimization(random_init, MLP, 
			train_dataset, representation_layer_size, 
			num_batches, batch_size, num_opt_steps, l1 = True) for _ in range(num_experiments)]



	fullreg_l1_ranks_data = [a for (a,b) in all_data]
	fullreg_l1_ranks_data = np.array(fullreg_l1_ranks_data)
	fullreg_l1_ranks_mean = np.mean(fullreg_l1_ranks_data, 0)
	fullreg_l1_ranks_std = np.std(fullreg_l1_ranks_data, 0)


	fullreg_l1_maxreg_data = [b for (a,b) in all_data]
	fullreg_l1_maxreg_data = np.array(fullreg_l1_maxreg_data)
	fullreg_l1_maxreg_mean = np.squeeze(np.mean(fullreg_l1_maxreg_data, 0))
	fullreg_l1_maxreg_std = np.squeeze(np.std(fullreg_l1_maxreg_data, 0))





	print("####################################################################")
	print("                             Starting optimistic training {}".format(dataset_name))
	print("####################################################################")

	lambdas = [0, .0001, .001, .01]
	colors = ["brown", "violet", "orange", "green"]


	lambdas_ranks_mean = []
	lambdas_ranks_std = []

	lambdas_reward_mean = []
	lambdas_reward_std = []

	lambdas_model_fitting_losses_mean = []
	lambdas_model_fitting_losses_std = []


	for lambda_reward_max in lambdas:

		if USE_RAY:
			all_data = [run_experiments_optimistic_reward_remote.remote(random_init, MLP, train_dataset, representation_layer_size, 
				num_batches, batch_size, num_opt_steps, lambda_reward_max = lambda_reward_max) for _ in range(num_experiments)]
			all_data = ray.get(all_data)

		else:
			all_data = [run_experiments_optimistic_reward(random_init, MLP, train_dataset, representation_layer_size, 
				num_batches, batch_size, num_opt_steps, lambda_reward_max = lambda_reward_max) for _ in range(num_experiments)]

		ranks_data = [a for (a,b,c) in all_data]
		ranks_data = np.array(ranks_data)
		ranks_mean = np.mean(ranks_data, 0)
		ranks_std = np.std(ranks_data, 0)

		lambdas_ranks_mean.append(ranks_mean)
		lambdas_ranks_std.append(ranks_std)

		maxreg_data = [b for (a,b,c) in all_data]
		maxreg_data = np.array(maxreg_data)
		maxreg_mean = np.squeeze(np.mean(maxreg_data, 0))
		maxreg_std = np.squeeze(np.std(maxreg_data, 0))

		lambdas_reward_mean.append(maxreg_mean)
		lambdas_reward_std.append(maxreg_std)

		model_losses_data = [c for (a,b,c) in all_data]
		model_losses_data = np.array(model_losses_data)
		model_losses_mean = np.squeeze(np.mean(model_losses_data, 0))
		model_losses_std = np.squeeze(np.std(model_losses_data, 0))

		lambdas_model_fitting_losses_mean.append(model_losses_mean)
		lambdas_model_fitting_losses_std.append(model_losses_std)



	print("###########################################################################")
	print("                                  Starting optimistic l1 training {}".format(dataset_name))
	print("###########################################################################")

	lambdas_l1_ranks_mean = []
	lambdas_l1_ranks_std = []

	lambdas_l1_reward_mean = []
	lambdas_l1_reward_std = []

	lambdas_model_fitting_l1_losses_mean = []
	lambdas_model_fitting_l1_losses_std = []


	for lambda_reward_max in lambdas:

		if USE_RAY:
			all_data = [run_experiments_optimistic_reward_remote.remote(random_init, MLP, train_dataset, representation_layer_size, 
				num_batches, batch_size, num_opt_steps, lambda_reward_max = lambda_reward_max, l1 = True) for _ in range(num_experiments)]
			all_data = ray.get(all_data)

		else:
			all_data = [run_experiments_optimistic_reward(random_init, MLP, train_dataset, representation_layer_size, 
				num_batches, batch_size, num_opt_steps, lambda_reward_max = lambda_reward_max, l1 = True) for _ in range(num_experiments)]

		ranks_l1_data = [a for (a,b,c) in all_data]
		ranks_l1_data = np.array(ranks_l1_data)
		ranks_l1_mean = np.mean(ranks_l1_data, 0)
		ranks_l1_std = np.std(ranks_l1_data, 0)

		lambdas_l1_ranks_mean.append(ranks_l1_mean)
		lambdas_l1_ranks_std.append(ranks_l1_std)

		maxreg_l1_data = [b for (a,b,c) in all_data]
		maxreg_l1_data = np.array(maxreg_l1_data)
		maxreg_l1_mean = np.squeeze(np.mean(maxreg_l1_data, 0))
		maxreg_l1_std = np.squeeze(np.std(maxreg_l1_data, 0))

		lambdas_l1_reward_mean.append(maxreg_l1_mean)
		lambdas_l1_reward_std.append(maxreg_l1_std)

		model_losses_l1_data = [c for (a,b,c) in all_data]
		model_losses_l1_data = np.array(model_losses_l1_data)
		model_losses_l1_mean = np.squeeze(np.mean(model_losses_l1_data, 0))
		model_losses_l1_std = np.squeeze(np.std(model_losses_l1_data, 0))

		lambdas_model_fitting_l1_losses_mean.append(model_losses_l1_mean)
		lambdas_model_fitting_l1_losses_std.append(model_losses_l1_std)



	print("############################################################")
	print("               plotting the l2 loss plots                   ")
	print("############################################################")



	plt.xlabel("Num Batches")
	plt.ylabel("Resulting Ranks")
	plt.title("Ranks - close up {}".format(dataset_name))

	plt.plot(np.arange(num_batches) + 1, greedy_ranks_mean, linewidth = 3, color = "red", label = "Greedy")
	plt.fill_between(np.arange(num_batches) + 1, greedy_ranks_mean - .1*greedy_ranks_std, greedy_ranks_mean + .1*greedy_ranks_std, color = "red", alpha = .2)

	plt.plot(np.arange(num_batches) + 1, fullreg_ranks_mean, linewidth = 3, color = "blue", label = "Full Regression")
	plt.fill_between(np.arange(num_batches) + 1, fullreg_ranks_mean - .1*fullreg_ranks_std, fullreg_ranks_mean + .1*fullreg_ranks_std, color = "blue", alpha = .2)

	for ranks_mean, ranks_std, lambda_reward_max, color in zip(lambdas_ranks_mean, lambdas_ranks_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, ranks_mean, linewidth = 3, color = color, label = "Optimism Lambda {}".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, ranks_mean - .1*ranks_std, ranks_mean + .1*ranks_std, color = color, alpha = .2)

	#plt.legend(bbox_to_anchor=(1.05, 1), fontsize=8, loc="upper left")
	plt.legend(loc="upper left")
	plt.ylim([0,30])
	
	plt.savefig("./results/ranks_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")



	plt.xlabel("Num Batches")
	plt.ylabel("Resulting Ranks")
	plt.title("Ranks {}".format(dataset_name))

	plt.plot(np.arange(num_batches) + 1, greedy_ranks_mean, linewidth = 3, color = "red", label = "Greedy")
	plt.fill_between(np.arange(num_batches) + 1, greedy_ranks_mean - .1*greedy_ranks_std, greedy_ranks_mean + .1*greedy_ranks_std, color = "red", alpha = .2)

	plt.plot(np.arange(num_batches) + 1, fullreg_ranks_mean, linewidth = 3, color = "blue", label = "Full Regression")
	plt.fill_between(np.arange(num_batches) + 1, fullreg_ranks_mean - .1*fullreg_ranks_std, fullreg_ranks_mean + .1*fullreg_ranks_std, color = "blue", alpha = .2)
	for ranks_mean, ranks_std, lambda_reward_max, color in zip(lambdas_ranks_mean, lambdas_ranks_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, ranks_mean, linewidth = 3, color = color, label = "Optimism Lambda {}".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, ranks_mean - .1*ranks_std, ranks_mean + .1*ranks_std, color = color, alpha = .2)

	#plt.legend(bbox_to_anchor=(1.05, 1), fontsize=8, loc="upper left")
	plt.legend(loc="upper left")
	plt.savefig("./results/ranksfull_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")





	plt.xlabel("Num Batches")
	plt.ylabel("Resulting Rewards")
	plt.title("Rewards {}".format(dataset_name))

	plt.plot(np.arange(num_batches) + 1, greedy_maxreg_mean, linewidth = 3, color = "red", label = "Greedy")
	plt.fill_between(np.arange(num_batches) + 1, greedy_maxreg_mean - .1*greedy_maxreg_std, greedy_maxreg_mean + .1*greedy_maxreg_std, color = "red", alpha = .2)

	plt.plot(np.arange(num_batches) + 1, fullreg_maxreg_mean, linewidth = 3, color = "blue", label = "Full Regression")
	plt.fill_between(np.arange(num_batches) + 1, fullreg_maxreg_mean - .1*fullreg_maxreg_std, fullreg_maxreg_mean + .1*fullreg_maxreg_std, color = "blue", alpha = .2)

	for maxreg_mean, maxreg_std, lambda_reward_max, color in zip(lambdas_reward_mean, lambdas_reward_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, maxreg_mean, linewidth = 3, color = color, label = "Optimism Lambda {}".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, maxreg_mean - .1*maxreg_std, maxreg_mean + .1*maxreg_std, color = color, alpha = .2)

	plt.plot(np.arange(num_batches) + 1, [baseline]*num_batches, color = "black", label = "baseline")

	plt.legend(loc="upper left")
	plt.savefig("./results/rewards_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")




	print("############################################################")
	print("               plotting the l1 loss plots                   ")
	print("############################################################")
	


	plt.xlabel("Num Batches")
	plt.ylabel("Resulting Ranks")
	plt.title("Ranks - close up {} - l1".format(dataset_name))

	plt.plot(np.arange(num_batches) + 1, greedy_l1_ranks_mean, linewidth = 3, color = "red", label = "Greedy l1")
	plt.fill_between(np.arange(num_batches) + 1, greedy_l1_ranks_mean - .1*greedy_l1_ranks_std, greedy_l1_ranks_mean + .1*greedy_l1_ranks_std, color = "red", alpha = .2)

	plt.plot(np.arange(num_batches) + 1, fullreg_ranks_mean, linewidth = 3, color = "blue", label = "Full Regression")
	plt.fill_between(np.arange(num_batches) + 1, fullreg_ranks_mean - .1*fullreg_ranks_std, fullreg_ranks_mean + .1*fullreg_ranks_std, color = "blue", alpha = .2)

	for ranks_l1_mean, ranks_l1_std, lambda_reward_max, color in zip(lambdas_l1_ranks_mean, lambdas_l1_ranks_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, ranks_l1_mean, linewidth = 3, color = color, label = "Optimism Lambda {} l1".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, ranks_l1_mean - .1*ranks_l1_std, ranks_l1_mean + .1*ranks_l1_std, color = color, alpha = .2)

	#plt.legend(bbox_to_anchor=(1.05, 1), fontsize=8, loc="upper left")
	plt.legend(loc="upper left")
	plt.ylim([0,30])
	
	plt.savefig("./results/ranks_l1_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")


	plt.xlabel("Num Batches")
	plt.ylabel("Resulting Ranks")
	plt.title("Ranks {} - l1".format(dataset_name))

	plt.plot(np.arange(num_batches) + 1, greedy_l1_ranks_mean, linewidth = 3, color = "red", label = "Greedy l1")
	plt.fill_between(np.arange(num_batches) + 1, greedy_l1_ranks_mean - .1*greedy_l1_ranks_std, greedy_l1_ranks_mean + .1*greedy_l1_ranks_std, color = "red", alpha = .2)

	plt.plot(np.arange(num_batches) + 1, fullreg_l1_ranks_mean, linewidth = 3, color = "blue", label = "Full Regression l1")
	plt.fill_between(np.arange(num_batches) + 1, fullreg_l1_ranks_mean - .1*fullreg_l1_ranks_std, fullreg_l1_ranks_mean + .1*fullreg_l1_ranks_std, color = "blue", alpha = .2)
	for ranks_l1_mean, ranks_l1_std, lambda_reward_max, color in zip(lambdas_ranks_mean, lambdas_ranks_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, ranks_l1_mean, linewidth = 3, color = color, label = "Optimism Lambda {} l1".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, ranks_l1_mean - .1*ranks_l1_std, ranks_l1_mean + .1*ranks_l1_std, color = color, alpha = .2)

	#plt.legend(bbox_to_anchor=(1.05, 1), fontsize=8, loc="upper left")
	plt.legend(loc="upper left")
	plt.savefig("./results/ranksfull_l1_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")


	plt.xlabel("Num Batches")
	plt.ylabel("Resulting Rewards")
	plt.title("Rewards {} - l1".format(dataset_name))

	plt.plot(np.arange(num_batches) + 1, greedy_l1_maxreg_mean, linewidth = 3, color = "red", label = "Greedy l1")
	plt.fill_between(np.arange(num_batches) + 1, greedy_l1_maxreg_mean - .1*greedy_l1_maxreg_std, greedy_l1_maxreg_mean + .1*greedy_l1_maxreg_std, color = "red", alpha = .2)

	plt.plot(np.arange(num_batches) + 1, fullreg_maxreg_mean, linewidth = 3, color = "blue", label = "Full Regression")
	plt.fill_between(np.arange(num_batches) + 1, fullreg_maxreg_mean - .1*fullreg_maxreg_std, fullreg_maxreg_mean + .1*fullreg_maxreg_std, color = "blue", alpha = .2)

	for maxreg_l1_mean, maxreg_l1_std, lambda_reward_max, color in zip(lambdas_reward_mean, lambdas_reward_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, maxreg_l1_mean, linewidth = 3, color = color, label = "Optimism Lambda {} l1".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, maxreg_l1_mean - .1*maxreg_l1_std, maxreg_l1_mean + .1*maxreg_l1_std, color = color, alpha = .2)

	plt.plot(np.arange(num_batches) + 1, [baseline]*num_batches, color = "black", label = "baseline")

	plt.legend(loc="upper left")
	plt.savefig("./results/rewards_l1_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")






	print("############################################################")
	print("               plotting the l2 model fit loss plots                   ")
	print("############################################################")



	plt.xlabel("Num Batches")
	plt.ylabel("Model Fit Loss")
	plt.title("Model Fit {}".format(dataset_name))

	for model_fitting_losses_mean, model_fitting_losses_std, lambda_reward_max, color in zip(lambdas_model_fitting_losses_mean, lambdas_model_fitting_losses_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, model_fitting_losses_mean, linewidth = 3, color = color, label = "Optimism Lambda {}".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, model_fitting_losses_mean - .1*model_fitting_losses_std, model_fitting_losses_mean + .1*model_fitting_losses_std, color = color, alpha = .2)

	#plt.legend(bbox_to_anchor=(1.05, 1), fontsize=8, loc="upper left")
	plt.legend(loc="upper left")
	
	plt.savefig("./results/model_fit_loss_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")



	print("############################################################")
	print("               plotting the l1 model fit loss plots                   ")
	print("############################################################")



	plt.xlabel("Num Batches")
	plt.ylabel("Model Fit Loss")
	plt.title("Model Fit {} l1".format(dataset_name))

	for model_fitting_l1_losses_mean, model_fitting_l1_losses_std, lambda_reward_max, color in zip(lambdas_model_fitting_l1_losses_mean, lambdas_model_fitting_l1_losses_std, lambdas, colors):

		plt.plot(np.arange(num_batches) + 1, model_fitting_l1_losses_mean, linewidth = 3, color = color, label = "Optimism Lambda {}".format(lambda_reward_max))
		plt.fill_between(np.arange(num_batches) + 1, model_fitting_l1_losses_mean - .1*model_fitting_l1_losses_std, model_fitting_l1_losses_mean + .1*model_fitting_l1_losses_std, color = color, alpha = .2)

	#plt.legend(bbox_to_anchor=(1.05, 1), fontsize=8, loc="upper left")
	plt.legend(loc="upper left")
	
	plt.savefig("./results/model_fit_loss_l1_{}_numbatch_{}.png".format(name_identifier, num_batches))
	plt.close("all")





	# IPython.embed()
	# raise ValueError("asdflkm")






def main():
	representation_layer_size = 10
	total_num_datapoints = 5
	num_experiments = 10

	for batch_size in [3, 10]:
		num_batches = int(total_num_datapoints/batch_size)
		for fit_regression_responses in [True, False]:
			analyze_dataset(dataset_name = "MultiSVM", representation_layer_size = representation_layer_size, batch_size = batch_size, num_batches = num_batches, fit_regression_responses = fit_regression_responses, num_experiments = num_experiments)
			raise ValueError("asdflkm")
			analyze_dataset(dataset_name = "BlogFeedback", representation_layer_size = representation_layer_size, batch_size = batch_size, num_batches = num_batches, fit_regression_responses = fit_regression_responses, num_experiments = num_experiments)
			analyze_dataset(dataset_name = "BikeSharingDay", representation_layer_size = representation_layer_size, batch_size = batch_size, num_batches = num_batches, fit_regression_responses = fit_regression_responses, num_experiments = num_experiments)
			analyze_dataset(dataset_name = "BikeSharingHour", representation_layer_size = representation_layer_size, batch_size = batch_size, num_batches = num_batches, fit_regression_responses = fit_regression_responses, num_experiments = num_experiments)
			analyze_dataset(dataset_name = "Adult", representation_layer_size = representation_layer_size, batch_size = batch_size, num_batches = num_batches, fit_regression_responses = fit_regression_responses, num_experiments =num_experiments)
			analyze_dataset(dataset_name = "Bank", representation_layer_size = representation_layer_size, batch_size = batch_size, num_batches = num_batches, fit_regression_responses = fit_regression_responses, num_experiments = num_experiments)


if __name__ == "__main__":

	main()



