from rl_algos.single_agent.Revalued.action_agent import Agent as ActionAgent

from utils.misc import get_dataset, wandb_init
from utils import env_wrappers

def revalued(config_dict):
    

    env = config_dict['env']
    dataset = None
    config_dict['is_continuous'] = False

    lr_info = {'critic_lr':1e-4, 
                'actor_lr':1e-4,
                'tau':5e-3,
                }


    config_dict.update(lr_info)
    config_dict['mem_size'] = 500000
    config_dict['algo_name']=f'revalued_action'


    if config_dict['use_wandb']:
        wandb_init(config_dict)


    obs_dims = env.observation_space.shape[0]
    
    agent = ActionAgent(obs_dims=obs_dims,
                  action_dims=env.action_space.shape[0],
                  dataset=dataset,
                  **config_dict
                  )

    
    if config_dict['offline']:
        agent.train_offline(config_dict)
    else:
        agent.train_online(config_dict)


