import argparse


def parse_args():
    # fmt: off
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--env", type=str, default="lending",
                        help="Environment to use for training.")
    parser.add_argument("--max_episode_steps", type=int, default=10000,
                        help="Maximum length of each episode.")
    parser.add_argument("--population_size", type=int, default=1000,
                        help="Size of the population in the environment.")
    
    # Lending environment parameters
    parser.add_argument("--success_func", type=str, default="credit",
                        help="Function to determine the success of the loan.")                        
    parser.add_argument("--cons_mean", type=float, default=0.55,
                        help="Mean of the conscientiousness distribution.")
    
    # College admission parameters
    parser.add_argument("--college_eps", type=float, default=0.5,
                        help="Epsilon for the college admission environment.")
    
    # Agent parameters
    parser.add_argument("--agent", type=str, default="ppo", 
                        help="Agent to use for training.")
    
    # Non-RL agent parameters
    parser.add_argument("--burnin", type=int, default=50,
                        help="Burnin period for the ML fairness agent.")
    
    # PPO parameters
    parser.add_argument("--total_timesteps", type=int, default=400000, 
                        help="Total timesteps of the experiments")
    parser.add_argument("--hidden_width", type=int, default=256, 
                        help="Width of the hidden layers")
    parser.add_argument("--learning_rate", type=float, default=5e-5,
                        help="Learning rate of the optimizer")
    parser.add_argument("--final_learning_rate", type=float, default=2e-5,
                        help="Final learning rate of the optimizer")
    parser.add_argument("--anneal_lr", action='store_true', default=False,
                        help="Toggle learning rate annealing for policy and value networks")
    parser.add_argument("--gamma", type=float, default=0.99, 
                        help="Discount factor gamma")
    parser.add_argument("--gae_lambda", type=float, default=0.95, 
                        help="Lambda for the general advantage estimation")
    parser.add_argument("--batch_size", type=int, default=512, 
                        help="Batch size of sample from the replay buffer")
    parser.add_argument("--mini_batch_size", type=int, default=64,
                        help="Batch size for one gradient update")
    parser.add_argument("--update_epochs", type=int, default=5, 
                        help="K epochs to update the policy")
    parser.add_argument("--norm_adv", action='store_true', default=False,
                        help="Toggles advantages normalization")
    parser.add_argument("--clip_coef", type=float, default=0.2,
                        help="Surrogate clipping coefficient")
    parser.add_argument("--clip_vloss", action='store_true', default=False, 
                        help="Toggles whether or not to use a clipped loss for the value function, as per the paper.")
    parser.add_argument("--ent_coef", type=float, default=0.01,
                        help="Coefficient of the entropy")
    parser.add_argument("--vf_coef", type=float, default=0.5,
                        help="coefficient of the value function")
    parser.add_argument("--max_grad_norm", type=float, default=0.5, 
                        help="Maximum norm for the gradient clipping")
    parser.add_argument("--target_kl", type=float, default=None,
                        help="Target KL divergence threshold")
    
    # DQN Parameters 
    parser.add_argument("--tau", type=float, default=1.0, 
                        help="the target network update rate")
    parser.add_argument("--exploration_fraction", type=float, default=0.1,
                        help="the fraction of `total-timesteps` it takes from start-e to go end-e")
    parser.add_argument("--start_e", type=float, default=1,
                        help="the starting epsilon for exploration")
    parser.add_argument("--end_e", type=float, default=0.05, 
                        help="the ending epsilon for exploration")
    parser.add_argument("--dqn_update_epochs", type=int, default=4,
                        help="Number of epochs to update the DQN")
    parser.add_argument("--target_network_update_freq", type=int, default=10,
                        help="Frequency to update the target network")
    
    # Bisimulator parameters
    parser.add_argument("--rew_coef", type=float, default=5.0, 
                        help="Coefficient for the reward loss.")
    parser.add_argument("--decay_rew_coef", action='store_true', default=False,
                        help="Decay the reward coefficient.")
    parser.add_argument("--dyn_model_epochs", type=int, default=100, 
                        help="K epochs to update the dynamics model of the bisimulator")
    parser.add_argument("--dyn_rollout_steps", type=int, default=5000,
                        help="Number of steps to rollout the dynamics model for the bisimulator")
    parser.add_argument("--start_dyn_opt", type=int, default=20000, 
                        help="Start the dynamics optimization after this number of global steps")
    parser.add_argument("--dyn_opt_iters", type=int, default=300, 
                        help="Iterations of the gradient-free optimization for the MDP dynamics.")
    parser.add_argument("--rew_steps", type=int, default=1,
                        help="Number of steps to optimize the reward model.")
    parser.add_argument("--rew_update_freq", type=int, default=1,
                        help="Frequency to update the reward model.")
    parser.add_argument("--anneal_bisim_lr", action='store_true', default=False,
                        help="Anneal the learning rate for the bisimulator.")
    
    # Lagrangian PPO parameters
    parser.add_argument("--nu-init", type=float, default=0,
                        help="the initial nu parameter")
    parser.add_argument("--nu-max", type=float, default=2.0,
                        help="the maximum range for nu parameter")
    parser.add_argument("--nu-lr", type=float, default=0.01,
                        help="the learning rate for nu")
    parser.add_argument("--epsilon", type=float, default=10,
                        help="the epsilon parameter")
    
    # A-PPO parameters
    parser.add_argument("--omega", type=float, default=0.005,
                        help="Threshold for delta")
    parser.add_argument("--beta_0", type=float, default=1.0,
                        help="Weight for advantage")
    parser.add_argument("--beta_1", type=float, default=0.25,
                        help="Weight for value thresholding")
    parser.add_argument("--beta_2", type=float, default=0.25,
                        help="Weight for dicrease in violation")
    
    # ELBERT paramaters
    parser.add_argument('--bias_coef', type=float, default=300, 
                        help='Bias coefficient for the ELBERT agent.')
    parser.add_argument('--beta_smooth', type=float, default=20, 
                        help='Beta smoothing parameter for the ELBERT agent.')
    
    # Run parameters
    parser.add_argument("--seed", type=int, default=1,
                        help="Random seed.")
    parser.add_argument("--device", type=str, default="cuda",
                        help="Device to use for training.")
    parser.add_argument("--eval_every", type=int, default=25, 
                        help="Evaluation frequency.")
    parser.add_argument("--plot_every", type=int, default=400,
                        help="Plotting frequency.")
    parser.add_argument('--save_every', type=int, default=400,
                        help='Save frequency.')
    parser.add_argument("--eval_count", type=int, default=5,
                        help="Number of episodes to evaluate.")
    args = parser.parse_args()
    # fmt: on
    return args
