@dataclass
class TrainingConfig:
    epochs: int = 20
    batch_size: int = 4
    learning_rate: float = 5e-6
    max_new_tokens: int = 100
    temperature: float = 0.7
    improvementfrom dataclasses import dataclass
from typing import Dict, List, Optional
import torch

@dataclass
class ModelConfig:
    model_name: str
    tokenizer_name: str
    max_length: int = 512
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    torch_dtype: str = "float16"
    use_device_map: bool = True

@dataclass
class TrainingConfig:
    epochs: int = 20
    batch_size: int = 4
    learning_rate: float = 5e-6
    max_new_tokens: int = 100
    temperature: float = 0.7
    improvement_multiplier: float = 15
    
    # PPO specific parameters
    clip_epsilon: float = 0.2
    value_loss_coef: float = 0.5
    entropy_coef: float = 0.01
    max_grad_norm: float = 1.0
    ppo_epochs: int = 4
    gamma: float = 0.99
    lam: float = 0.95
    
@dataclass
class HybridRLConfig:
    initial_alpha: float = 0.8
    min_alpha: float = 0.1
    max_alpha: float = 0.9
    temporal_decay: float = 0.4
    complexity_weight: float = 0.3
    confidence_weight: float = 0.2

@dataclass
class FeedbackConfig:
    human_expertise_level: float = 0.85
    human_inter_annotator_agreement: float = 0.75
    ai_bias_level: float = 0.03
    ai_uncertainty_threshold: float = 0.3
    
@dataclass
class ExperimentConfig:
    max_samples: int = 100
    random_seed: int = 42
    noise_level: float = 0.05
    
DEFAULT_MODEL_CONFIGS = [
    ModelConfig(
        model_name="meta-llama/Llama-2-7b-hf",
        tokenizer_name="meta-llama/Llama-2-7b-hf"
    ),
    ModelConfig(
        model_name="distilgpt2",
        tokenizer_name="distilgpt2"
    )
]