import numpy as np
from copy import deepcopy

class RandomJointPolicy(object):
	def __init__(self, policy_bias=0.8):
		self._action_probs = {}
		self._policy_bias = policy_bias

	def get_action_probabilities(self, infostate, player, legal_actions):
		infostate_string = str(infostate)
		if infostate_string not in self._action_probs.keys():
			self._action_probs[infostate_string] = self._generate_random_policy_for_infostate(len(legal_actions))
		return self._action_probs[infostate_string]


	def _generate_random_policy_for_infostate(self, num_actions):
		bias_index = np.random.randint(num_actions)
		policy = np.random.rand(num_actions)
		if num_actions == 1:
			return policy / np.sum(policy)
		policy[bias_index] = 0.
		policy = (policy / np.sum(policy)) * (1. - self._policy_bias)
		policy[bias_index] = self._policy_bias
		return policy

class RLPolicyWrapper(object):
	def __init__(self, agent):
		self._agent = agent

	def get_action_probabilities(self, infostate, player, legal_actions):
		return self._agent.action_probabilities(infostate, player, legal_actions)


def expected_value(state, policy):
	if state.terminal():
		return state.score()
	to_move = state.get_player_to_move()
	legal_actions = state.get_legal_actions(to_move)
	value = 0.
	action_probs = policy.get_action_probabilities(state.get_infostate(to_move), to_move, legal_actions)
	for idx, a in enumerate(legal_actions):
		copy = deepcopy(state)
		copy.play(to_move, a)
		value += action_probs[idx] * expected_value(copy, policy)
	return value

# works for values between [0, num_features)
def one_hot(value, num_features):
	ret = np.zeros(num_features)
	if value and value >= 0:
		ret[int(value)] += 1.
	return ret
