import os
import argparse
import gymnasium as gym
from agent import AR_A2C_Agent
from utils import set_seed

def build_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument("--num_frames", type=int, default=3_000_000)
    parser.add_argument("--seed", type=int, default=797)
    parser.add_argument("--actor_lr", type=float, default=1e-5)
    parser.add_argument("--critic_lr", type=float, default=7e-5)
    parser.add_argument("--rho_lr", type=float, default=1e-4)
    parser.add_argument("--rho_clip", type=float, default=5.0)
    parser.add_argument("--hidden", type=int, default=256)
    parser.add_argument("--blocks", type=int, default=2)
    parser.add_argument("--heatmap_every", type=int, default=50_000)
    parser.add_argument("--wandb_project", type=str, default="discounted_a2c")
    parser.add_argument("--run_name", type=str, default=None)
    parser.add_argument("--log_every", type=int, default=100)
    parser.add_argument("--anchor_every", type=int, default=1,
                        help="anchor frequency in steps (0=disable, 1=every step)")
    return parser

def main():
    args = build_parser().parse_args()

    set_seed(args.seed)
    env = gym.make("Pendulum-v1", render_mode=None)

    agent = AR_A2C_Agent(env,
                         seed=args.seed,
                         actor_lr=args.actor_lr, critic_lr=args.critic_lr,
                         rho_lr=args.rho_lr, rho_clip=args.rho_clip,
                         hidden=args.hidden, blocks=args.blocks,
                         wandb_project=args.wandb_project, run_name=args.run_name,
                         log_every=args.log_every, anchor_every=args.anchor_every)

    agent.train(num_frames=args.num_frames, heatmap_every=args.heatmap_every)

    os.makedirs("videos/a2c_avg_reward_test", exist_ok=True)
    agent.test(video_folder="videos/a2c_avg_reward_test")

if __name__ == "__main__":
    main()
