"""
We implement additional hyperparameter optimization methods not present in
https://scikit-optimize.github.io/.

Gist: https://gist.github.com/Deepblue129/2c5fae9daf0529ed589018c6353c9f7b
"""

import math
import logging
import random

from tqdm import tqdm

logger = logging.getLogger(__name__)


def _random_points(dimensions, n_points, random_seed=None, test=False):
	""" Generate a random sample of points from dimensions """
	# NOTE: We supply as `randint` to `random_state`; otherwise, dimensions with the same distribution would
	# recive the same sequence of random numbers.
	# We seed `random` so the random seeds generated are deterministic.
	random.seed(random_seed)
	points = {
		d.name: d.rvs(n_samples=n_points, random_state=random.randint(0, 2**32))
		for d in dimensions
	}
	if test == True:
		points = [{k: i for k in points} for i in range(n_points) if i != 47 and i >= 10 or i == 4 or i == 5 or i == 6]
#		points = [{k: i for k in points} for i in range(n_points) if i != 47 and i >= 0 or i == 4 or i == 5 or i == 6]
	else:
		points = [{k: points[k][i] for k in points} for i in range(n_points)]
	
	return points

### functions for confidence curve

###### normal distribution

# import required libraries
from scipy.stats import norm
from scipy.stats import expon
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
 
# Creating the distribution
data = np.arange(1,10,0.01)
pdf = norm.pdf(data , loc = 5.3 , scale = 1 )
 
#Visualizing the distribution
 
'''
sb.set_style('whitegrid')
sb.lineplot(data, pdf , color = 'black')
plt.xlabel('Heights')
plt.ylabel('Probability Density')
'''

def Phi(x, mean = 0, sd = 1):
	return norm(loc = mean, scale = sd).cdf(x)

def phi(x, mean = 0, sd = 1):
	return norm(loc = mean, scale = sd).pdf(x)

def get_topk_confidence(mu_array, m, sigma, threshold=0.85):

	n = len(mu_array)
	probability = 0.0


	total_iteration = 30
	if sigma < 0.05:
		total_iteration *= 2
	y = 1.0 / total_iteration
	delta_y = 1.0 / total_iteration

	total_pdf = 0

	for iteration in range(total_iteration):

		part_1 = 0.0
		part_2 = 1.0

		y += delta_y

		for i in range(m):
#			x = (y - mu_array[i]) / sigma
			phi_x = phi(y, mu_array[i], sigma)
			Phi_x = Phi(y, mu_array[i], sigma)
#			print("[i] : phi Phi", y, phi_x, Phi_x)
			if Phi_x > 1e-120:
				part_1 += phi_x / Phi_x
			part_2 *= Phi_x
#			print(x)

		total_pdf += part_1 * part_2 * delta_y
#		print(total_pdf)

		for i in range(m, n):
#			print(i)
#			x = (y - mu_array[i]) / sigma
			part_2 *= Phi(y, mu_array[i], sigma)

		probability += part_1 * part_2 * delta_y
#		print(y, probability)

	print("probability = ", probability)
#	print("total pdf = ", total_pdf / sigma)
	print("total pdf = ", total_pdf)
	return probability

'''
### for testing
mu_array = []
mu_array = [0.28, 0.25, .24, .213, .19]
print(get_confidence_curve(mu_array, 0.2))
#print(get_topk_confidence(mu_array, 2, 0.2))
exit()
'''



def get_topk_confidence_backup(mu_array, m, sigma):

	n = len(mu_array)
	probability = 0.0

	part_1 = 0.0
	part_2 = 1.0

	total_iteration = 100
	y = 1.0 / total_iteration
	delta_y = 1.0 / total_iteration

	total_pdf = 0

	for iteration in range(total_iteration):

		y += delta_y

		for i in range(m):
			x = (y - mu_array[i]) / sigma
			phi_x = phi(x)
			Phi_x = Phi(x)
			part_1 += phi_x / Phi_x
			part_2 *= Phi_x
#			print(x)

		total_pdf += part_1 * part_2 * delta_y

		for i in range(m, n):
			print(i)
			x = (y - mu_array[i]) / sigma
			part_2 *= Phi(x)

		probability += part_1 * part_2 * delta_y

	print("probability = ", probability)
	print("total pdf = ", total_pdf)
	return probability / (sigma * 100)

def get_confidence_curve(mu_array, sigma, n_models=5, threshold=0.85):
	n_configurations = len(mu_array)
	n_configurations = min(n_configurations, n_models)

	confidence_curve = []
	for i in range(n_configurations):
		probability = get_topk_confidence(mu_array, i + 1, sigma)
		confidence_curve.append(probability)
		if probability > threshold:
			break

	return confidence_curve

'''	
print(get_confidence_curve(mu_array, 0.2))
mu_array = []
mu_array = [0.5, 0.25, .24, .213, .19]
#print(get_topk_confidence(mu_array, 1, 0.1))
exit()
'''

def get_next_round_n_model(mu_array, sigma, threshold, n_models=5):
	n_configurations = len(mu_array)
	n_configurations = min(n_configurations, n_models)
	confidence_curve = get_confidence_curve(mu_array, sigma, n_models)

	print("confidence_curve = ", confidence_curve)

	n_model = 1
	while True:
		if n_configurations == n_model or confidence_curve[n_model - 1] > threshold:
			break
		n_model += 1

	return n_model

'''
mu_array = []
mu_array = [.253, .2526, .2529, .213, .19]
print(get_next_round_n_model(mu_array, sigma = 0.01, threshold = 0.85, n_models=3))
exit()
'''

def get_sigma(total_resources_per_model):
	theta = 1
	x = total_resources_per_model / 50
	### mini simulation
#	x = total_resources_per_model - 1
	return expon.pdf(x) / 5

def successive_discarding(
		objective,
		dimensions,
		max_resources_per_round=45*3,
		total_budgets=45*3*5,
		threshold=0.9,
		initial_resources=3,
		n_models=45,
		random_seed=None,
		progress_bar=True,
		test=False):
	"""
	Successive Discarding algorithm,

	keep the best models every iteration of the `initial_models' downsamoling each time

	Args:
		objective (callable): objective function to minimize
			Named Args:
				resources (int): number of resources (e.g., epochs) to use while training model
				checkpoint (any): saved data from past run
			Returns:
				score (float): score to minimize
				checkpoint (any): saved data from run
		dimensions (list of skopt.Dimensions): list of dimensions to minimize under
		max_resources_per_round: Max number of resources (e.g. epochs) to use per round
		threshold: the confidence threshold to keep while discarding
		n_models (int): number of models to evaluate
		random_seed (int, optional): Random seed for generating hyperparameters
		progress_bar (boolean or tqdm): Iff to use or update a progress bar.
	Returns:
		scores (list of floats): Scores of the top objective executions
		hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence to scores.
	"""

	round_n_models = lambda n: max(round(n), 1)

	total_resources_per_model = 0
	total_resources = 0
	round_number = 0
	hyperparameters = _random_points(dimensions, round_n_models(n_models), random_seed, test)
	checkpoints = [None for _ in range(round_n_models(n_models))]
	scores = [0 for _ in range(round_n_models(n_models))]

	# Create a new progress bar
	remember_to_close = False
	if not isinstance(progress_bar, tqdm) and progress_bar:
		remember_to_close = True
		progress_bar = tqdm()
		setattr(progress_bar, 'stats', {'max_score': 0, 'models_evaluated': 0})

	while round_number < 5 and n_models > 1 and total_resources < total_budgets: # Or total_resources < total_budget
		round_number += 1
		# Compute the number of resources to continue running each model with
		update_n_resources = round_n_models(max_resources_per_round / n_models)

		results = []
		for score, checkpoint, params in zip(scores, checkpoints, hyperparameters):
			new_score, new_checkpoint = objective(
#				resources=update_n_resources, checkpoint=checkpoint, **params)
				resources=total_resources_per_model + update_n_resources - 1, checkpoint=checkpoint, **params)
#			print("new score", new_score)
			new_score = max(score, new_score)
			results.append(tuple([new_score, new_checkpoint]))
			if isinstance(progress_bar, tqdm):
				progress_bar.update(update_n_resources)
				if progress_bar.stats['max_score'] < new_score:
					progress_bar.stats['max_score'] = new_score
					progress_bar.set_postfix(progress_bar.stats)

		total_resources += update_n_resources * n_models
		total_resources_per_model += update_n_resources

		is_last_iteration = total_resources >= total_budgets or round_number >=5 or n_models == 1

		if not is_last_iteration:
			# Sort by minimum score `k[0][0]'
			results = sorted(zip(results, hyperparameters), key=lambda k: -k[0][0])
			# Update `hyperparameters' lists
			results, hyperparameters = zip(*results)

			mu_array = []
			mu_array = [result[0] for result in results]
			print("results ~~~ ", results)
			sigma = get_sigma(total_resources_per_model)
			print(" --- mu : ", mu_array)
			print(" --- sigma : ", sigma)
			n_models = get_next_round_n_model(mu_array, sigma, threshold, n_models)
			print(" --- n_models : ", n_models)
			models_evaluated = len(results) - n_models
		else:
			models_evaluated = len(results)

		scores, checkpoints = zip(*results)
		print("scores:", scores[: n_models])
		print("checkpoints:", checkpoints[: n_models])
		
		print(" [----] total_iterations:", total_resources_per_model)

		if isinstance(progress_bar, tqdm):
			progress_bar.stats['models_evaluated'] += models_evaluated
			progress_bar.set_postfix(progress_bar.stats)

	if remember_to_close:
		progress_bar.close()

	return scores, hyperparameters

### TEST ###
import unittest

import random
from skopt.space import Real, Integer

from lib.utils import config_logging

synthesized_accuracy = [[0.28, 0.25, .24, .213, .19],
						[.30,   .27, .26, .24,  .20],
						[.33,   .31, .29, .23,  .19],
						[.34,   .35, .31, .29,  .22],
						[.35,   .37, .32, .30,  .24]
]

def successive_halving(
		objective,
		dimensions,
		max_resources_per_model=81,
		downsample=3,  # Test random downsamples work and check boundaries
		initial_resources=3,
		n_models=45,
		random_seed=None,
		progress_bar=True):
	"""
	Adaptation of the Successive Halving algorithm.

	tl;dr keep the best models every iteration of the `initial_models` downsampling each time

	Adaptation: Instead of running for N / 2 models for 2T, we run N / 2 models for T**downsample.
	This adaptation is the same adaptation of Successive Halving in hyperband.

	Reference: http://proceedings.mlr.press/v51/jamieson16.pdf
	Reference: http://www.argmin.net/2016/06/23/hyperband/

	TODO: Splitting in half is a fairly random number. We could possibly look for a better split
	point. For example, we could use the large margin to split points. Or predict the performance
	of hyperparameters would do well in the future.

	Args:
		objective (callable): objective function to minimize
			Named Args:
				resources (int): number of resources (e.g. epochs) to use while training model
				checkpoint (any): saved data from past run
				**hyperparameters (any): hyperparameters to run
			Returns:
				score (float): score to minimize
				checkpoint (any): saved data from run
		dimensions (list of skopt.Dimensions): list of dimensions to minimize under
		max_resources_per_model: Max number of resources (e.g. epochs) to use per model
		downsample: Downsampling of models (e.g. halving is a downsampling of 2)
		initial_resources: Number of resources (e.g. epochs) to use initially to evaluate first
		  round.
		n_models (int): Number of models to evaluate
		random_seed (int, optional): Random seed for generating hyperparameters
		progress_bar (boolean or tqdm): Iff to use or update a progress bar.
	Returns:
		scores (list of floats): Scores of the top objective executions
		hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence
			to scores.
	"""
	if downsample <= 1:
		raise ValueError('Downsample must be > 1; otherwise, the number of resources allocated' +
						 'does not grow')

	round_n_models = lambda n: max(round(n), 1)

	total_resources_per_model = 0
	hyperparameters = _random_points(dimensions, round_n_models(n_models), random_seed, test=True)
	checkpoints = [None for _ in range(round_n_models(n_models))]
	scores = [math.inf for _ in range(round_n_models(n_models))]

	print(hyperparameters)

	# Create a new progress bar
	remember_to_close = False
	if not isinstance(progress_bar, tqdm) and progress_bar:
		remember_to_close = True
		# TODO: Compute the tqdm total
		progress_bar = tqdm()
		# Keep tabs on a set of stats
		setattr(progress_bar, 'stats', {'min_score': math.inf, 'models_evaluated': 0})

	while total_resources_per_model < max_resources_per_model:
		# Compute number of resources to continue running each model with
		if total_resources_per_model == 0:
			update_n_resources = initial_resources
		else:
			update_n_resources = min(
				total_resources_per_model * downsample - total_resources_per_model,
				max_resources_per_model - total_resources_per_model)

		results = []
		print("n = ", n_models)

		cnt = 0
		for score, checkpoint, params in zip(scores, checkpoints, hyperparameters):
			new_score, new_checkpoint = objective(
				resources=update_n_resources, checkpoint=checkpoint, **params)
			new_score = min(score, new_score)
			results.append(tuple([new_score, new_checkpoint]))
			if isinstance(progress_bar, tqdm):
				progress_bar.update(update_n_resources)
				if progress_bar.stats['min_score'] < new_score:
					progress_bar.stats['min_score'] = new_score
					progress_bar.set_postfix(progress_bar.stats)
			cnt += 1

		print("cnt ---------------", cnt)

		total_resources_per_model += update_n_resources

		# NOTE: If this is not the last
		is_last_iteration = total_resources_per_model >= max_resources_per_model
		if not is_last_iteration:
			# Sort by minimum score `k[0][0]`
			results = sorted(zip(results, hyperparameters), key=lambda k: -k[0][0])
			models_evaluated = len(results) - round_n_models(n_models / downsample)
			results = results[:round_n_models(n_models / downsample)]
			# Update `hyperparameters` lists
			results, hyperparameters = zip(*results)
			n_models = n_models / downsample
		else:
			models_evaluated = len(results)

		# Update `scores` and `checkpoints` lists
		scores, checkpoints = zip(*results)

		if isinstance(progress_bar, tqdm):
			progress_bar.stats['models_evaluated'] += models_evaluated
			progress_bar.set_postfix(progress_bar.stats)

	if remember_to_close:
		progress_bar.close()

	return scores, hyperparameters

import pandas as pd

def synthesized_test():
	df = pd.read_excel('data/syn_v0.xlsx')
#print(df)

#print(df.iloc[0, 1])
#exit()

	synthesized_dimensions = [Integer(0, 4, name='integer')]
	def mock_objective(resources, integer=0, checkpoint=None):
		'''
		if checkpoint is not None:
			return checkpoint, checkpoint
		'''
		if integer == 5 or integer == 6 or integer == 9:
			integer = integer
		else:
			if integer <= 10:
				integer = 10
		return df.iloc[resources, integer], integer
		return synthesized_accuracy[resources][integer], integer

	'''
	scores, hyperparameters = successive_discarding(
			objective = mock_objective,
			dimensions = synthesized_dimensions,
			max_resources_per_round = 250,
			total_budgets = 250*5,
			threshold = 0.85,
			initial_resources=3,
			n_models=50,
			random_seed=None,
			progress_bar=True,
			test=True)
	'''
	scores, hyperparameters = successive_halving(
			objective=mock_objective,
			dimensions=synthesized_dimensions,
			max_resources_per_model=80,
			downsample=2,
			initial_resources=5,
			n_models=50,
			random_seed=None,
			progress_bar=True)

'''
from data.nasbench2.nats import get_accuracy

def objective(resources, integer=0, checkpoint=None):
	return get_accuracy(integer, resources, is_tss=True), integer

synthesized_dimensions = [Integer(0, 4, name='integer')]

scores, hyperparameters = successive_halving(
			objective=objective,
			dimensions=synthesized_dimensions,
			max_resources_per_model=80,
			downsample=2,
			initial_resources=5,
			n_models=50,
			random_seed=None,
			progress_bar=True)



print(scores, hyperparameters)
'''

#exit()



def hyperband(objective,
			  dimensions,
			  max_resources_per_model=81,
			  downsample=3,
			  total_resources=None,
			  random_seed=None,
			  progress_bar=True):
	"""
	Adaptation of the Hyperband algorithm

	tl;dr search over the space of successive halving hyperparameters

	Adaptation: Originally Hyperband was implemented with the assumption that we cannot reuse
	models. We redid the math allowing for reusing models. This is particularly helpful in speeding
	up 1 GPU hyperparameter optimization. Just to clarify, by reusing models, we mean that
	given hyperparameters `x` and epochs `y`, we can use one model to evaluate all `y` integers
	with hyperparameters `x`.

	Reference: https://arxiv.org/pdf/1603.06560.pdf 
	Reference: http://www.argmin.net/2016/06/23/hyperband/

	TODO: Implement extension to hyperband proporting an increase of x4:
	https://arxiv.org/pdf/1705.10823.pdf 
	http://www.ijcai.org/Proceedings/15/Papers/487.pdf

	Args:
		objective (callable): objective function to minimize
			Named Args:
				resources (int): number of resources (e.g. epochs) to use while training model
				checkpoint (any): saved data from past run
				**hyperparameters (any): hyperparameters to run
			Returns:
				score (float): score to minimize
				checkpoint (any): saved data from run
		dimensions (list of skopt.Dimensions): list of dimensions to minimize under
		max_resources_per_model (float): Max number of resources (e.g. epochs) to use per model
		downsample (int): Downsampling of models (e.g. halving is a downsampling of 2)
		total_resources (optional): Max number of resources hyperband is allowed to use over the
			entirety of the algorithm.
		random_seed (int, optional): Random seed for generating hyperparameters
		progress_bar (boolean, optional): Boolean for displaying tqdm
	Returns:
		scores (list of floats): Scores of the top objective executions
		hyperparameters (list of lists of dict): Hyperparameters with a one to one correspondence
			to scores.
	"""
	if downsample <= 1:
		raise ValueError('Downsample must be > 1; otherwise, the number of resources allocated' +
						 'does not grow')

	all_scores = []
	all_hyperparameters = []

	# Number of times to run hyperband
	# Ex. `max_resources_per_model = 81 and downsample = 3`
	#	 Then => initial_resources = [1, 3, 9, 27, 81]
	#	 And => `hyperband_rounds = 5`
	#	 And => `successive_halving_rounds = [5, 4, 3, 2, 1]`
	n_hyperband_rounds = math.floor(math.log(max_resources_per_model, downsample)) + 1
	if total_resources is None:
		# TODO: Multiply by the number of dimensions so it scales the number of models
		# given the large space
		total_resources_per_round = max_resources_per_model * n_hyperband_rounds
	else:
		total_resources_per_round = total_resources / n_hyperband_rounds
	total_models_evaluated = 0

	if progress_bar:
		progress_bar = tqdm(total=total_resources_per_round * n_hyperband_rounds)
		setattr(progress_bar, 'stats', {'min_score': math.inf, 'models_evaluated': 0})

	for i in reversed(range(n_hyperband_rounds)):
		n_successive_halving_rounds = i + 1

		# NOTE: Attained by running the below code on https://sandbox.open.wolframcloud.com:
		#   Reduce[Power[d, j - 1] * (x / Power[d, j]) +
		#   Sum[(Power[d, i] - Power[d, i - 1]) * (x / Power[d, i]), {i, j, k}] == e
		#   && k >=j>=1 && k>=1 && d>=1, {x}]
		# `e` is `total_resources_per_round`
		# `x` is `n_models`
		# `k - j` is `i`
		# `d` is downsample
		# The summation is similar to the successive halving rounds loop. It computes the number
		# of resources with reuse run in total. This is different from hyperband that assumes
		# no reuse.
		n_models = downsample * total_resources_per_round
		n_models /= downsample * (1 + i) - i
		n_models /= downsample**(-i + n_hyperband_rounds - 1)
		total_models_evaluated += n_models

		scores, hyperparameters = successive_halving(
			objective=objective,
			dimensions=dimensions,
			max_resources_per_model=max_resources_per_model,
			downsample=downsample,
			initial_resources=max_resources_per_model / downsample**i,
			n_models=n_models,
			random_seed=random_seed,
			progress_bar=progress_bar)
		logger.info('Finished hyperband round: %d of %d', n_hyperband_rounds - i - 1,
					n_hyperband_rounds - 1)
		all_scores.extend(scores)
		all_hyperparameters.extend(hyperparameters)

	if isinstance(progress_bar, tqdm):
		progress_bar.close()

	logger.info('Total models evaluated: %f', total_models_evaluated)
	logger.info('Total resources used: %f', total_resources_per_round * n_hyperband_rounds)
	logger.info('Total resources used per model on average: %f',
				total_models_evaluated / total_resources_per_round * n_hyperband_rounds)

	return all_scores, all_hyperparameters


### TEST ###
config_logging()

mock_dimensions = [Integer(1, 100, name='integer')]


def mock(resources, integer=0, checkpoint=None):
	# `integer` is a hyperparameter set the first batch
	if checkpoint is not None:
		return checkpoint, checkpoint
	return integer, integer


class TestHyperparameterOptimization(unittest.TestCase):

	def test_hyperband_simple(self):
		# Basic check on hyperband
		scores, hyperparameters = hyperband(objective=mock, dimensions=mock_dimensions)
		for score, hyperparameter in zip(scores, hyperparameters):
			self.assertEqual(score, hyperparameter['integer'])

	def test_successive_halving_simple(self):
		# Basic check on successive halving
		scores, hyperparameters = successive_halving(objective=mock, dimensions=mock_dimensions)
		for score, hyperparameter in zip(scores, hyperparameters):
			self.assertEqual(score, hyperparameter['integer'])

	def test_hyperband_no_progress_bar(self):
		# Basic check on hyperband
		scores, hyperparameters = hyperband(
			objective=mock, dimensions=mock_dimensions, progress_bar=False)
		for score, hyperparameter in zip(scores, hyperparameters):
			self.assertEqual(score, hyperparameter['integer'])

	def test_successive_halving_no_progress_bar(self):
		# Basic check on successive halving
		scores, hyperparameters = successive_halving(
			objective=mock, dimensions=mock_dimensions, progress_bar=False)
		for score, hyperparameter in zip(scores, hyperparameters):
			self.assertEqual(score, hyperparameter['integer'])

	def test_successive_halving_downsample(self):
		with self.assertRaises(ValueError):
			successive_halving(
				objective=mock,
				dimensions=mock_dimensions,
				progress_bar=False,
				downsample=1,
				n_models=45)


if __name__ == '__main__':
	unittest.main()
