# OFMU Experiment Configuration

# Experiment settings
experiment:
  name: "OFMU_Comprehensive_Evaluation"
  description: "Complete evaluation of OFMU algorithm on TOFU, WMDP, and CIFAR datasets"
  output_dir: "./results"
  log_level: "INFO"
  seed: 42
  
  # Hardware settings
  hardware:
    device: "auto"  # auto, cuda, cpu
    mixed_precision: true
    gradient_checkpointing: true
    num_gpus: 1
    max_memory_per_gpu: "24GB"

# Training configurations
training:
  # Common training settings
  num_epochs: 10
  learning_rate: 1e-5
  warmup_ratio: 0.1
  weight_decay: 0.01
  max_grad_norm: 1.0
  
  # Batch sizes (adjust based on GPU memory)
  batch_size:
    language_models: 4
    vision_models: 32
    evaluation: 16
  
  # Gradient accumulation
  gradient_accumulation_steps: 4
  
  # Logging and saving
  logging_steps: 100
  eval_steps: 500
  save_steps: 1000
  save_total_limit: 3

# OFMU-specific parameters
ofmu:
  # Core OFMU parameters
  beta: 0.1  # Gradient similarity weight
  penalty_coeff: 1.0  # Penalty method coefficient
  
  # Bi-level optimization
  inner_optimization:
    steps: 5
    learning_rate: 1e-5
    similarity_metric: "cosine"  # cosine, dot_product, euclidean
  
  outer_optimization:
    steps: 3
    learning_rate: 1e-5
    penalty_method: "quadratic"  # quadratic, absolute
  
  # Gradient similarity settings
  similarity_threshold: 0.7
  gradient_clipping: true
  
  # Advanced settings
  adaptive_beta: false
  beta_schedule: "constant"  # constant, linear, cosine
  early_stopping: true
  patience: 3

# Baseline methods configuration
baseline_methods:
  gradient_ascent:
    learning_rate: 1e-4
    max_steps: 1000
    
  gradient_difference:
    learning_rate: 1e-5
    alpha: 0.1  # Retain loss weight
    
  npo:
    learning_rate: 1e-5
    beta: 0.1
    
  rmu:
    learning_rate: 1e-5
    alpha: 1.0
    
  finetune:
    learning_rate: 2e-5
    num_epochs: 3

# Dataset-specific configurations
datasets:
  tofu:
    scenarios: ["forget01", "forget05", "forget10"]
    max_length: 2048
    evaluation_metrics:
      - "forget_quality"
      - "model_utility" 
      - "truth_ratio"
      - "rouge_score"
    
  wmdp:
    domains: ["bio", "cyber", "chem"]
    max_length: 2048
    evaluation_metrics:
      - "qa_accuracy"
      - "safety_score"
      - "retain_performance"
    
  muse:
    categories: ["books", "movies", "cities"]
    max_length: 2048
    evaluation_metrics:
      - "forget_quality"
      - "neighbor_accuracy"
      - "truthfulness"
    
  cifar:
    datasets: ["CIFAR10", "CIFAR100"]
    forget_classes:
      CIFAR10: [1, 5]  # Number of classes to forget
      CIFAR100: [5, 10, 20]
    evaluation_metrics:
      - "unlearn_accuracy"
      - "retain_accuracy"
      - "test_accuracy"
      - "mia_auc"
    transforms:
      train:
        - "RandomCrop"
        - "RandomHorizontalFlip"
        - "ToTensor"
        - "Normalize"
      test:
        - "ToTensor" 
        - "Normalize"

# Model configurations
models:
  language:
    primary: ["llama2-7b"]
    secondary: ["llama3-8b]
    
  vision:
    primary: ["resnet18"]
    secondary: ["resnet50"]

# Evaluation settings
evaluation:
  # Generation settings for language models
  generation:
    max_new_tokens: 256
    temperature: 0.7
    top_p: 0.9
    do_sample: true
    num_return_sequences: 1
  
  # Evaluation frequencies
  eval_during_training: true
  eval_frequency: 500  # steps
  final_evaluation: true
  
  # Statistical analysis
  statistical_tests: true
  confidence_level: 0.95
  num_bootstrap_samples: 1000
  
  # Metrics computation
  compute_perplexity: true
  compute_generation_metrics: true
  compute_mia_metrics: true

# Experiment matrix
experiments:
  # TOFU experiments
  tofu_experiments:
    enabled: true
    models: ["llama2-7b"]
    scenarios: ["forget01", "forget05", "forget10"]
    methods: ["ofmu", "gradient_ascent", "gradient_diff", "npo", "rmu"]
    
  # WMDP experiments  
  wmdp_experiments:
    enabled: true
    models: ["llama2-7b"]
    domains: ["bio", "cyber", "chem"]
    methods: ["ofmu", "gradient_ascent"]
    
  # MUSE experiments
  muse_experiments:
    enabled: false  # Enable if MUSE data available
    models: ["llama2-7b"]
    categories: ["books"]
    methods: ["ofmu"]
    
  # CIFAR experiments
  cifar_experiments:
    enabled: true
    datasets: ["CIFAR10"]
    models: ["resnet18"]
    forget_classes: [1, 5]
    methods: ["ofmu", "finetune", "retrain"]

# Logging and monitoring
logging:
  # Console logging
  console_level: "INFO"
  
  # File logging
  file_logging: true
  log_file: "experiment.log"
  file_level: "DEBUG"
  
  # Weights & Biases
  wandb:
    enabled: false  # Set to true to enable W&B logging
    project: "ofmu-experiments"
    entity: null  # Your W&B username/organization
    tags: ["ofmu", "unlearning"]
  
  # TensorBoard
  tensorboard:
    enabled: true
    log_dir: "./logs/tensorboard"

# Output and results
output:
  # Results directory structure
  base_dir: "./results"
  create_timestamp_dirs: true
  
  # Save configurations
  save_model_checkpoints: true
  save_evaluation_results: true
  save_generation_samples: true
  save_analysis_plots: true
  
  # Results format
  results_format: ["json", "csv"]
  plot_format: ["png", "pdf"]
  
  # Compression
  compress_outputs: false
  compression_format: "zip"

# Reproducibility
reproducibility:
  seed: 42
  deterministic_algorithms: true
  benchmark_cudnn: false
  
# Resource management
resources:
  # Memory management
  max_memory_usage: "90%"
  clear_cache_frequency: 100  # steps
  
  # Parallel processing
  max_workers: 4
  multiprocessing_backend: "spawn"
  
  # Timeout settings
  experiment_timeout: 3600  # seconds per experiment
  evaluation_timeout: 600   # seconds per evaluation

# Error handling
error_handling:
  continue_on_error: true
  max_retries: 3
  retry_delay: 60  # seconds
  
  # Failure modes
  save_on_failure: true
  email_on_failure: false
  
# Advanced settings
advanced:
  # Profiling
  enable_profiling: false
  profile_memory: false
  
  # Debugging
  debug_mode: false
  verbose_logging: false
  
  # Experimental features
  use_flash_attention: false
  use_gradient_checkpointing: true
  use_mixed_precision: true