
import gymnasium as gym
from stable_baselines3 import DQN, PPO, A2C
import os

def train_rl_agents():
    env = gym.make("MountainCar-v0")

    dqn = DQN("MlpPolicy", env, verbose=0)
    dqn.learn(total_timesteps=1000)

    ppo = PPO("MlpPolicy", env, verbose=0)
    ppo.learn(total_timesteps=1000)

    a2c = A2C("MlpPolicy", env, verbose=0)
    a2c.learn(total_timesteps=1000)

    os.makedirs("results/models", exist_ok=True)
    dqn.save("results/models/dqn_agent")
    ppo.save("results/models/ppo_agent")
    a2c.save("results/models/a2c_agent")

    env.close()
    return dqn, ppo, a2c
