import numpy as np

class EXP3(object):
	def __init__(self, K, T):
		self.K = K
		self.w = np.array([1.0 for _ in range(K)])
		self.L = np.array([0.0 for _ in range(K)])
		self.eta = np.sqrt(2*np.log(K)/(T*K))

	def select_arm(self):
		p = self.w / np.sum(self.w)
		self.at = np.random.choice([i for i in range(self.K)], p=p)
		return self.at

	def update_weight(self, at, lt):
		self.L[at] -= self.eta*lt/self.w[at]
		self.L -= min(self.L) 
		self.w = np.exp(self.L)

class EXPRB(object):
	"""docstring for EXPRB"""
	def __init__(self, K, T, Phi):
		self.K = K
		self.w = np.array([1.0 for _ in range(K)])
		self.q = np.array([1.0 for _ in range(K)])
		self.R = np.array([0.0 for _ in range(K)])
		self.eta = np.sqrt(K*np.log(K)/(T*(np.exp(2)-1)))
		self.gamma = Phi

	def select_arm(self):
		p = (1-self.eta)*self.w/np.sum(self.w)+self.eta/self.K
		self.at = np.random.choice([i for i in range(self.K)], p=p)
		return self.at

	def update_weight(self, at, lt):
		xt = 1-lt
		delta_t = 0
		p = self.w / np.sum(self.w)
		if p[at] < self.q[at]:
			delta_t = min(self.gamma*(1-p[at]/self.q[at]), 1)
			self.q[at] = max(p[at], (1-1.0/self.gamma)*self.q[at])
		self.R[at]+=self.eta*(xt+delta_t)/(p[at]*self.K)
		self.R-=max(self.R)
		self.w = np.exp(self.R)


class attacker(object):
	def __init__(self, ad, alpha, eps):
		self.ad = ad
		self.alpha = alpha
		self.eps = eps

	def EasyAttack(self, lt, at):
		return lt if at==self.ad else 1

	def HardAttack(self, lt, at, t):
		return min(1.0-t**(self.alpha+self.eps-1), lt) if at==self.ad else 1

if __name__ == "__main__":
	pass