import os
from dataclasses import dataclass
from typing import Optional

from src.dataset_processing.common.enums.dataset_types import DatasetType
from src.dataset_processing.common.enums.source_types import DatasetSourceType
from src.dataset_processing.perturbations.enums import PerturbationType

def get_data_path(path):
    value = os.environ.get(path)
    if not value:
        raise ValueError(f"Required environment variable '{path}' is missing or empty in .env file")
    return value

data_path = get_data_path('DATA_PATH')

@dataclass
class BaseDatasetConfig:
    """Base configuration shared by all datasets"""
    dataset_type: DatasetType
    dataset_name: str
    source_type: DatasetSourceType
    base_dir: str = data_path
    num_entries: Optional[int] = None
    num_shots: Optional[int] = 0
    force_reprocess: bool = False
    random_seed: int = 42
    
@dataclass
class BaseRawDatasetConfig(BaseDatasetConfig):
    """Base configuration shared by all raw datasets"""
    perturbation_type: PerturbationType = PerturbationType.NONE
    perturbation_intensity: int = 0
    
    def __post_init__(self):
        """Ensure dataset type is correct"""
        self.source_type = DatasetSourceType.RAW
        self.perturbation_type = PerturbationType.NONE
        self.perturbation_intensity = 0
    
@dataclass
class BaseProcessedDatasetConfig(BaseDatasetConfig):
    """Base configuration shared by all processed datasets"""
    perturbation_type: PerturbationType = PerturbationType.NONE
    perturbation_intensity: int = 0
    
    def __post_init__(self):
        """Ensure dataset type is correct"""
        self.source_type = DatasetSourceType.PROCESSED
