"""
        configuration:
                Get data and parameter from the user
"""
import argparse
from pathlib import Path
from transformers import SchedulerType

TRAIN_DATASET_PATH=""
EVAL_DATASET_PATH=""

class SQUADConfig:
    """
            Multi Label Classification PROJECT:
                    BaseConfig
    """

    def __init__(self):
        self.parser = argparse.ArgumentParser()

        self.parser.add_argument(
            "--dataset_name",
            type=str,
            default=None,
            help="The name of the dataset to use (via the datasets library).",
        )
        self.parser.add_argument(
            "--dataset_config_name",
            type=str,
            default=None,
            help="The configuration name of the dataset to use (via the datasets library).",
        )
        self.parser.add_argument(
            "--train_file", type=str, default=None, help="A csv or a json file containing the training data."
        )
        self.parser.add_argument(
            "--preprocessing_num_workers", type=int, default=1, help="A csv or a json file containing the training data."
        )
        self.parser.add_argument("--do_predict", action="store_true", help="To do prediction on the question answering model")
        self.parser.add_argument(
            "--validation_file", type=str, default=None, help="A csv or a json file containing the validation data."
        )
        self.parser.add_argument(
            "--test_file", type=str, default=None, help="A csv or a json file containing the Prediction data."
        )
        self.parser.add_argument(
            "--max_seq_length",
            type=int,
            default=128,
            help=(
                "The maximum total input sequence length after tokenization. Sequences longer than this will be truncated,"
                " sequences shorter will be padded if `--pad_to_max_lengh` is passed."
            ),
        )
        self.parser.add_argument(
            "--pad_to_max_length",
            action="store_true",
            help="If passed, pad all samples to `max_seq_length`. Otherwise, dynamic padding is used.",
        )
        self.parser.add_argument(
            "--model_name_or_path",
            type=str,
            help="Path to pretrained model or model identifier from huggingface.co/models.",
            # required=True,
        )
        self.parser.add_argument(
            "--config_name",
            type=str,
            default=None,
            help="Pretrained config name or path if not the same as model_name",
        )
        self.parser.add_argument(
            "--tokenizer_name",
            type=str,
            default=None,
            help="Pretrained tokenizer name or path if not the same as model_name",
        )
        self.parser.add_argument(
            "--use_slow_tokenizer",
            action="store_true",
            help="If passed, will use a slow tokenizer (not backed by the 🤗 Tokenizers library).",
        )
        self.parser.add_argument(
            "--per_device_train_batch_size",
            type=int,
            default=8,
            help="Batch size (per device) for the training dataloader.",
        )
        self.parser.add_argument(
            "--per_device_eval_batch_size",
            type=int,
            default=8,
            help="Batch size (per device) for the evaluation dataloader.",
        )
        self.parser.add_argument(
            "--learning_rate",
            type=float,
            default=5e-5,
            help="Initial learning rate (after the potential warmup period) to use.",
        )
        self.parser.add_argument("--weight_decay", type=float, default=0.0, help="Weight decay to use.")
        self.parser.add_argument("--num_train_epochs", type=int, default=3, help="Total number of training epochs to perform.")
        self.parser.add_argument(
            "--max_train_steps",
            type=int,
            default=None,
            help="Total number of training steps to perform. If provided, overrides num_train_epochs.",
        )
        self.parser.add_argument(
            "--gradient_accumulation_steps",
            type=int,
            default=1,
            help="Number of updates steps to accumulate before performing a backward/update pass.",
        )
        self.parser.add_argument(
            "--lr_scheduler_type",
            type=SchedulerType,
            default="linear",
            help="The scheduler type to use.",
            choices=["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"],
        )
        self.parser.add_argument(
            "--num_warmup_steps", type=int, default=0, help="Number of steps for the warmup in the lr scheduler."
        )
        self.parser.add_argument("--output_dir", type=str, default=None, help="Where to store the final model.")
        self.parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.")
        self.parser.add_argument(
            "--doc_stride",
            type=int,
            default=64,
            help="When splitting up a long document into chunks how much stride to take between chunks.",
        )
        self.parser.add_argument(
            "--n_best_size",
            type=int,
            default=20,
            help="The total number of n-best predictions to generate when looking for an answer.",
        )
        self.parser.add_argument(
            "--null_score_diff_threshold",
            type=float,
            default=0.0,
            help=(
                "The threshold used to select the null answer: if the best answer has a score that is less than "
                "the score of the null answer minus this threshold, the null answer is selected for this example. "
                "Only useful when `version_2_with_negative=True`."
            ),
        )
        self.parser.add_argument(
            "--version_2_with_negative",
            action="store_true",
            help="If true, some of the examples do not have an answer.",
        )
        self.parser.add_argument(
            "--dataset_write_path", type=str, default=None, help="A csv or a json file containing the training data."
        )
        self.parser.add_argument(
            "--cache_dir", type=str, default=None, help="A csv or a json file containing the training data."
        )
        self.parser.add_argument(
            "--max_answer_length",
            type=int,
            default=30,
            help=(
                "The maximum length of an answer that can be generated. This is needed because the start "
                "and end predictions are not conditioned on one another."
            ),
        )
        self.parser.add_argument(
            "--max_train_samples",
            type=int,
            default=None,
            help=(
                "For debugging purposes or quicker training, truncate the number of training examples to this "
                "value if set."
            ),
        )
        self.parser.add_argument(
            "--max_eval_samples",
            type=int,
            default=None,
            help=(
                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
                "value if set."
            ),
        )
        self.parser.add_argument(
            "--overwrite_cache", type=bool, default=False, help="Overwrite the cached training and evaluation sets"
        )

        self.parser.add_argument(
            "--hyper_param_search", type=bool, default=False, help="Overwrite the cached training and evaluation sets"
        )

        
        self.parser.add_argument(
            "--max_predict_samples",
            type=int,
            default=None,
            help="For debugging purposes or quicker training, truncate the number of prediction examples to this",
        )
        self.parser.add_argument(
            "--model_type",
            type=str,
            default=None,
            help="Model type to use if training from scratch.",
            # choices=MODEL_TYPES,
        )
        self.parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.")
        self.parser.add_argument(
            "--hub_model_id", type=str, help="The name of the repository to keep in sync with the local `output_dir`."
        )
        self.parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.")
        self.parser.add_argument(
            "--checkpointing_steps",
            type=str,
            default=None,
            help="Whether the various states should be saved at the end of every n steps, or 'epoch' for each epoch.",
        )
        self.parser.add_argument(
            "--resume_from_checkpoint",
            type=str,
            default=None,
            help="If the training should continue from a checkpoint folder.",
        )
        self.parser.add_argument(
            "--with_tracking",
            action="store_true",
            help="Whether to load in all available experiment trackers from the environment and use them for logging.",
        )

        self.parser.add_argument(
            "--shard",
            type=int,
            default=256,
            help=(
                "The maximum length of an answer that can be generated. This is needed because the start "
                "and end predictions are not conditioned on one another."
            ),
        )
        

    def get_args(self):
        return self.parser.parse_args()
