import argparse


def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description='gptq-dynamic args')

    parser.add_argument(
        '--model-dir', type=str,
        help='Model to load; pass location of hugginface converted checkpoint.'
    )
    parser.add_argument(
        '--quant-group-size', type=int, default=128,
        help='Quantization group size.'
    )
    parser.add_argument(
        '--quant-bit-width', type=float, default=4.,
        help='Quantization bit width.'
    )
    parser.add_argument(
        '--quant-order', type=str, choices=['none', 'right2left', 'random', 'act', 'min_pivot'], default='act',
        help='Quantization order.'
    )
    parser.add_argument(
        '--quant-use-entropy-mode', type=str, choices=['none', 'grouped_e', 'grouped_h', 'all_e', 'all_h', 'strict_e', 'strict_h'], default='none',
        help='Quantization using entropy mode.'
    )
    parser.add_argument(
        '--quant-do-clip', type=str2bool, default=True,
        help='Quantization do clipping.'
    )
    parser.add_argument(
        '--quant-use-mse', type=str2bool, default=True,
        help='Quantization using mse scale.'
    )
    parser.add_argument(
        '--seqlen', type=int, default=2048,
        help='Sequence length for quantization.'
    )
    parser.add_argument(
        '--data-train-set', type=str, choices=['wikitext2', 'ptb', 'c4'], default='c4',
        help='Where to extract calibration data from.'
    )
    parser.add_argument(
        '--data-train-n-samples', type=int, default=128,
        help='Number of calibration data samples.'
    )
    parser.add_argument(
        '--data-seed', type=int, default=42,
        help='Seed for sampling the calibration data.'
    )
    parser.add_argument(
        '--data-cache-dir', type=str, default='./cache/datasets',
        help='Directory to cache datasets.'
    )
    parser.add_argument(
        '--do-quant', type=str2bool, default=True,
        help='Whether to quantize the model or do evaluation only.'
    )
    parser.add_argument(
        '--save-model', type=str2bool, default=True,
        help='save the quantized model or not.'
    )
    parser.add_argument(
        '--batch-size', type=int, default=16,
        help='Batch size for quantization.'
    )
    parser.add_argument(
        '--save-gpu-mem-level', type=int, choices=[1, 2, 3, 4], default=2,
        help='Level of GPU memory saving. Higher level means longer runtime.'
    )
    parser.add_argument(
        '--do-rtn', type=str2bool, default=False,
        help='Whether to use RTN (reconstruction tensor norm) for quantization.'
    )
    parser.add_argument(
        '--outlier-percentage', type=float, default=None,
        help='Oulier percentage in case of using SSQR'
    )
    parser.add_argument(
        '--output-base', type=str, default='./outputs',
        help='Base directory for saving output models.'
    )
    parser.add_argument(
        '--eval-openllm', type=str2bool, default=False,
        help='Wether to evaluate the model using OpenLLM.'
    )
    parser.add_argument(
        '--lm-eval-batch-size', type=str, default='auto',
        help='Batch size for lm-eval.'
    )
    parser.add_argument(
        "--lm-eval-tasks",
        nargs="+",
        type=str,
        default=["mmlu_cot_llama", "arc_challenge_llama", "gsm8k_llama", "hellaswag", "winogrande", "truthfulqa"],
        help="OpenLLMv1 tasks to evaluate after quantization."
    )
    parser.add_argument(
        '--wandb', action="store_true",
        help='Log in wandb'
    )

    args = parser.parse_args()
    return args
