import argparse

import torch


def get_args():
    parser = argparse.ArgumentParser(description='RL')
    parser.add_argument(
        '--algo', default='a2c', help='algorithm to use: a2c | ppo | acktr')
    parser.add_argument(
        '--gail',
        action='store_true',
        default=False,
        help='do imitation learning with gail')
    parser.add_argument(
        '--gail-experts-dir',
        default='./gail_experts',
        help='directory that contains expert demonstrations for gail')
    parser.add_argument(
        '--gail-batch-size',
        type=int,
        default=128,
        help='gail batch size (default: 128)')
    parser.add_argument(
        '--gail-epoch', type=int, default=5, help='gail epochs (default: 5)')
    parser.add_argument(
        '--lr', type=float, default=7e-4, help='learning rate (default: 7e-4)')
    parser.add_argument(
        '--eps',
        type=float,
        default=1e-5,
        help='RMSprop optimizer epsilon (default: 1e-5)')
    parser.add_argument(
        '--alpha',
        type=float,
        default=0.99,
        help='RMSprop optimizer apha (default: 0.99)')
    parser.add_argument(
        '--gamma',
        type=float,
        default=0.99,
        help='discount factor for rewards (default: 0.99)')
    parser.add_argument(
        '--use-gae',
        action='store_true',
        default=False,
        help='use generalized advantage estimation')
    parser.add_argument(
        '--gae-lambda',
        type=float,
        default=0.95,
        help='gae lambda parameter (default: 0.95)')
    parser.add_argument(
        '--entropy-coef',
        type=float,
        default=0.01,
        help='entropy term coefficient (default: 0.01)')
    parser.add_argument(
        '--value-loss-coef',
        type=float,
        default=0.5,
        help='value loss coefficient (default: 0.5)')
    parser.add_argument(
        '--max-grad-norm',
        type=float,
        default=0.5,
        help='max norm of gradients (default: 0.5)')
    parser.add_argument(
        '--seed', type=int, default=1, help='random seed (default: 1)')
    parser.add_argument(
        '--cuda-deterministic',
        action='store_true',
        default=False,
        help="sets flags for determinism when using CUDA (potentially slow!)")
    parser.add_argument(
        '--num-processes',
        type=int,
        default=16,
        help='how many training CPU processes to use (default: 16)')
    parser.add_argument(
        '--num-steps',
        type=int,
        default=5,
        help='number of forward steps in A2C (default: 5)')
    parser.add_argument(
        '--ppo-epoch',
        type=int,
        default=4,
        help='number of ppo epochs (default: 4)')
    parser.add_argument(
        '--num-mini-batch',
        type=int,
        default=32,
        help='number of batches for ppo (default: 32)')
    parser.add_argument(
        '--clip-param',
        type=float,
        default=0.2,
        help='ppo clip parameter (default: 0.2)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        help='log interval, one log per n updates (default: 10)')
    parser.add_argument(
        '--save-interval',
        type=int,
        default=100,
        help='save interval, one save per n updates (default: 100)')
    parser.add_argument(
        '--eval-interval',
        type=int,
        default=None,
        help='eval interval, one eval per n updates (default: None)')
    parser.add_argument(
        '--num-env-steps',
        type=int,
        default=10e6,
        help='number of environment steps to train (default: 10e6)')
    parser.add_argument(
        '--env-name',
        default='PongNoFrameskip-v4',
        help='environment to train on (default: PongNoFrameskip-v4)')
    parser.add_argument(
        '--log-dir',
        default='/tmp/gym/',
        help='directory to save agent logs (default: /tmp/gym)')
    parser.add_argument(
        '--save-dir',
        default='./trained_models/',
        help='directory to save agent logs (default: ./trained_models/)')
    parser.add_argument(
        '--no-cuda',
        action='store_true',
        default=False,
        help='disables CUDA training')
    parser.add_argument(
        '--cuda-id',
        type=int,
        default=0)
    parser.add_argument(
        '--use-proper-time-limits',
        action='store_true',
        default=False,
        help='compute returns taking into account time limits')
    parser.add_argument(
        '--recurrent-policy',
        action='store_true',
        default=False,
        help='use a recurrent policy')
    parser.add_argument(
        '--use-linear-lr-decay',
        action='store_true',
        default=False,
        help='use a linear schedule on the learning rate')
    # LM args
    parser.add_argument('--models', dest='models', action='store', default='gpt2-xl', help='name of model(s), e.g., GPT2-XL')
    parser.add_argument('--datasets', dest='datasets', action='store', default='sst2', help='name of dataset(s), e.g., agnews')
    parser.add_argument('--num_shots', dest='num_shots', action='store', default=8, type=int, help='num training examples to use')
    parser.add_argument('--max_steps', dest='max_steps', action='store', default=5, type=int, help='max number of steps to permute')
    parser.add_argument('--api_num_log_prob', dest='api_num_log_prob', action='store', required=False, type=int,
                        default=100, help='number of top tokens to ask for when querying the model. Capped at 100 for OpenAI GPT-3 API')
    parser.add_argument('--approx', dest='approx', action='store_const', const=True, default=False,
                        help='whether to set token prob to zero if not in top 100')
    parser.add_argument('--normalize_obs', dest='normalize_obs', action='store_const', const=True, default=False,
                        help='Whether to normalize the obs')
    parser.add_argument('--normalize_rew', dest='normalize_rew', action='store_const', const=True, default=False,
                        help='Whether to normalize the rew')
    parser.add_argument('--subsample_test_set', dest='subsample_test_set', action='store', required=False, type=int,
                        default=300, help='size of test set to use to speed up eval. None means using all test set')
    parser.add_argument('--verbalizer', dest='verbalizer', action='store_const', const=True, default=False,
                        help='Whether to use verbalizer')
    parser.add_argument('--rew_type', dest='rew_type', action='store', default='ce', help='which loss type to use')
    parser.add_argument('--example_pool_size', dest='example_pool_size', action='store', default=15, type=int, help='num training examples to use to select from')
    parser.add_argument('--use_attention', dest='use_attention', action='store_const', const=True, default=False, help='num training examples to use to select from')
    parser.add_argument('--use_knn', dest='use_knn', action='store_const', const=True, default=False, help='whether to perform KNN search for each query')
    parser.add_argument('--sub_sample', dest='sub_sample', action='store_const', const=True, default=False, help='whether to subsample training dataset')
    parser.add_argument('--local_rank', dest='local_rank', action='store', required=False, type=int)
    parser.add_argument('--num_actors', default=8, type=int, metavar='N', help='Number of actors.')
    parser.add_argument('--env_entropy_coef', type=float, default=0.0, help='exploration bonus coefficient (default: 0.5)')
    parser.add_argument('--random_init', default=0, type=int, metavar='N', help='whether to random init dataset')
    parser.add_argument('--load_ckpt', dest='load_ckpt', action='store_const', const=True, default=False,
                        help='Whether to load ckpt')
    parser.add_argument('--evaluate', dest='evaluate', action='store_const', const=True, default=False,
                        help='evaluate the policy')
    parser.add_argument('--use_adv', dest='use_adv', action='store_const', const=True, default=False,
                        help='whether use adv in es')
    parser.add_argument('--use_value', dest='use_value', action='store_const', const=True, default=False,
                        help='whether use value network in PPO')
    parser.add_argument('--use_mirror', dest='use_mirror', action='store_const', const=True, default=False,
                        help='whether use mirrored sampling in es')
    parser.add_argument('--use_rew_model', dest='use_rew_model', action='store_const', const=True, default=False,
                        help='whether use gptfuzz reward model to train policy')
    parser.add_argument('--ckpt_path', default='/tmp/gym/',help='directory to load pretrained policy')
    parser.add_argument('--openai_key', type=str, default='You must have an OpenAI key', help='OpenAI key')
    args = parser.parse_args()

    args.cuda = not args.no_cuda and torch.cuda.is_available()

    assert args.algo in ['a2c', 'ppo', 'acktr']
    if args.recurrent_policy:
        assert args.algo in ['a2c', 'ppo'], \
            'Recurrent policy is not implemented for ACKTR'

    return args
