import yaml
import numpy as np
from Baird import Baird


# from mdp import MDP
# from mdp_config import get_mdp
# from graph_utils import generate_mixing_matrix,get_graph
# from utils import sovleBellman_equation,calc_bellman_error
# import copy
# from agent import get_agent



def main(config):

    num_iter = config["num_iter"]
    alpha = config["alpha"]

    gamma = config["gamma"]

    env = Baird(False,False,0)

    num_states = env.NUM_STATES
    num_actions = env.NUM_ACTIONS
    
    state =env.reset()
    for step in range(num_iter):

    
        action =  np.random.choice()
        next_state,r = env.step(action)


    # agent = get_agent(agent_type)(config,config_mdp,W)

    # Qs = [np.random.uniform(-1,1,(num_states,num_actions)) for i in range(N)]

    # state,action = env.reset()

    # Qstar = sovleBellman_equation(env.D,np.mean(env.R,axis=0),env.P,env.num_states,env.num_actions,env.gamma)
    # Qstar = Qstar.reshape(num_states,num_actions)

    # error_hist = []
    # for step in range(num_iter):

    #     action =  np.random.choice(config_mdp["num_actions"])
    #     next_state,r = env.step(action)

    #     agent.update(state,action,next_state,r)
    #     error = agent.error(Qstar)

    #     error_hist.append(error)
    #     state = next_state
    #     if step%1000==0:
    #         print('step:',step)
    # return error_hist


    
if __name__ == '__main__':
    with open('config.yaml') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    main(config)

    # results = []
    # for i in range(config["num_runs"]):

    #     print('i-th run:',i)
        
    #     error = main(config)
    #     results.append(error)
    # np.save(f'result/{config["exp_num"]}_N-{config["N"]}_graph-{config["graph_type"]}-agent_{config["agent"]}.npy',results)


