"""
This part of code is the Q learning brain, which is a brain of the agent.
All decisions are made in here.

View more on my tutorial page: https://morvanzhou.github.io/tutorials/
Independent Q learning
"""
from collections import defaultdict

import numpy as np
import pandas as pd


class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.2):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
        self.action_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
    def update_epsilon(self):
        self.epsilon += 0.1

    def choose_action(self, observation):
        self.check_state_exist(observation)
        # action selection
        if np.random.uniform() < self.epsilon:
            # choose best action
            state_action = self.q_table.loc[observation, :]  # will return [Q(s,a1), Q(s,a2), Q(s,a3), Q(s,a4)]
            # some actions may have the same value, randomly choose on in these actions
            action = np.random.choice(state_action[state_action == np.max(state_action)].index)
        else:
            # choose random action
            action = np.random.choice(self.actions)
        #print("current pos: {}, greedy aciton: {}".format(observation, action)) #TODO
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        q_predict = self.q_table.loc[s, a] # use q table to get the corresponding Q(s,a) for each agent individually
        if s_ != 'terminal':
            q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
        else:
            q_target = r  # next state is terminal
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
        self.action_table.loc[s, a] = self.action_table.loc[s, a] + 1

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # Append new state to q table
            # Create a new DataFrame for the row to append
            new_row = pd.DataFrame(
                [[0] * len(self.actions)],  # This creates a list of lists for the data
                columns=self.q_table.columns,
                index=[state]  # Set the index to the new state
            )
            # Use pd.concat to append the new row
            self.q_table = pd.concat([self.q_table, new_row])
        if state not in self.action_table.index:
            # Append new state to q table
            # Create a new DataFrame for the row to append
            new_row = pd.DataFrame(
                [[0] * len(self.actions)],  # This creates a list of lists for the data
                columns=self.action_table.columns,
                index=[state]  # Set the index to the new state
            )
            # Use pd.concat to append the new row
            self.action_table = pd.concat([self.action_table, new_row])

