from dataclasses import dataclass, field
from typing import Optional, List
from transformers import HfArgumentParser

import dp_transformers

@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.

    Using `HfArgumentParser` we can turn this class
    into argparse arguments to be able to specify them on
    the command line.training_args
    """   
    train_dataset_name: str = field(
        metadata={"help": "Name of the training dataset"}
        )
    
    eval_dataset_name: str = field(
        default=None,
        metadata={"help": "Name of the evaluation dataset"}
        )
    
    
    number_of_canaries: int = field(
        default=10_000, 
        metadata={"help": "Length of the canary dataset"}
    )
    
    
    cardinality: int = field(
        default=127,
        metadata={"help": "Cardinality (Number of generated canaries to use)"}
        )
    
    canary_types : List[str]  = field(
        default_factory=lambda:['sha256'],
        metadata={"help": "List of canaries to use"}
        )
    
    canary_dataset_name: str = field(
        default='persona_canaries',
        metadata={"help": "Name of the canaries dataset"}
        )
         
    max_seq_length: int = field(
        default=64,
        metadata={
            "help": "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded."
        },
    )  
    
    only_canaries: bool = field(
        default=False,
        metadata={"help": "Whether to use only canaries for training"}
    )
    
    original_version: bool = field(
        default=False,
        metadata={"help": "Whether to use the original version of the auditing procedure (following Steinke et al.)"}
    )
    
    black_box_audit: bool = field(
        default=True,
        metadata={"help": "Whether to use black-box auditing or not"}
    )
@dataclass
class ModelArguments:
    """
    Arguments pretaining to which model/config/tokenizer we are going to fine-tune from.
    """
    
    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained Prefix model or model identifier from huggingface.co/models"}
    )
    
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
        
    use_fast_tokenizer: bool = field(
        default=True,
        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
    )
    
    constant_scheduler: bool = field(
        default=False,
        metadata={"help": "Whether to use a constant scheduler or not"},
    )
    
 

def get_args(arg_list=None):
    """Parse all the args."""
    parser = HfArgumentParser((DataTrainingArguments, ModelArguments, dp_transformers.TrainingArguments, dp_transformers.PrivacyArguments))
    if arg_list:
        args = parser.parse_args_into_dataclasses(arg_list)
    else:
        args = parser.parse_args_into_dataclasses()
    
    return args

def print_args(model_args, data_args, training_args, privacy_args, path_to_file: str):
        
    with open (path_to_file, mode='w+') as f:
        f.write('=' * 10)
        f.write('---MODEL ARGS---\n')
        for k, v in model_args.__dict__.items():
            f.write('        - {} : {}'.format(k, v))
        f.write('\n---DATA ARGS---\n')
        for k, v in data_args.__dict__.items():
            f.write('        - {} : {}'.format(k, v))
        f.write('\n---TRAINING ARGS---\n')
        for k, v in training_args.__dict__.items():
            f.write('        - {} : {}'.format(k, v))
        f.write('\n---PRIVACY ARGS---\n')
        for k, v in privacy_args.__dict__.items():
            f.write('        - {} : {}'.format(k, v))
