import numpy as np

def unsummarized_history(hist):
    prompt = f"""So far you have played {len(hist)} times with the following choices and rewards:\n"""
    for i in range(len(hist)):
        prompt += f"""{hist[i][0]} button, reward {hist[i][1]}\n"""
    return prompt

def summarized_history(hist,colors,k):
    prompt = f"""So far you have played {len(hist)} times with your past choices and rewards summarized as follows:\n"""
    for i in range(k):
        counts = len([x for x in hist if x[0] == colors[i]])
        if counts > 0:
            rews = np.sum([x[1] for x in hist if x[0] == colors[i]])
            prompt += f"""{colors[i]} button: pressed {counts} times with average reward {rews/counts:.1f}\n"""
        else:
            prompt += f"""{colors[i]} button: pressed {counts} times\n"""
    return prompt

class OldButtonsPrompt(object):
    def __init__(self,T,K,suggestive=False,summarized=False,cot=False,dist=False):
        self.T=T
        self.K=K
        self.suggestive = suggestive
        self.summarized = summarized
        self.cot = cot
        if self.cot:
            raise Exception('oldbuttons template does not support CoT')
        self.dist = False
        self.colors = ['blue', 'green', 'red', 'yellow', 'purple', 'brown', 'white']

    def get_name(self):
        name = "oldbuttons"
        name += "_sug" if self.suggestive else "_neu"
        name += "_sum" if self.summarized else "_raw"
        name += "_cot" if self.cot else "_not"
        name += f"_K={self.K}"
        return name

    def get_outputs(self):
        return self.colors[0:self.K]

    def get_reward_scale(self):
        return 1

    def get_system_text(self):
        return None

    def get_main_prompt(self,hist):
        if self.suggestive:
            prompt = f"""You are a bandit algorithm in a room with {self.K} buttons labeled {self.colors[0:self.K]}.\n"""
        else:
            prompt = f"""You are in a room with {self.K} buttons labeled {self.colors[0:self.K]}.\n"""            
        prompt += f"""Each button is associated with a Bernoulli distribution with a fixed but unknown mean; the means for the buttons could be different. 
For each button, when you press it, you will get a reward that is sampled from the button's associated distribution.
You have {self.T} time steps and, on each time step, you can choose any button and receive the reward.
Your goal is to maximize the total reward over the {self.T} time steps.\n"""

        if self.summarized:
            prompt += summarized_history(hist,self.colors,self.K)
        else:
            prompt += unsummarized_history(hist)
        prompt += f"""\nWhich button will you choose next? PLEASE RESPOND ONLY WITH ONE OF {self.colors[0:self.K]} AND NO TEXT EXPLANATION."""
        return (prompt)

    def parse_output(self,pred):
        return (pred)

    def parse_cot_output(self,pred):
        raise Exception('oldbuttons template does not support CoT')
