"""
Data configuration module
Manage parameter settings for different data types
"""

import os
import numpy as np

class DataConfig:
    """Data configuration class"""
    
    def __init__(self, data_type='synthetic'):
        """
        Initialize data configuration
        
        Args:
            data_type: 'synthetic' or 'real'
        """
        self.data_type = data_type
        self._load_config()
    
    def _load_config(self):
        """Load configuration"""
        if self.data_type == 'synthetic':
            self._load_synthetic_config()
        elif self.data_type == 'real':
            self._load_real_config()
        else:
            raise ValueError(f"Unknown data_type: {self.data_type}")
    
    def _load_synthetic_config(self):
        """Load configuration for synthetic data"""
        self.dataset = "synthetic"
        self.is_empirical = 0
        self.nu = 100  # Reduce number of users to lower memory usage
        self.m = 10     # Number of clusters
        self.d = 20     # Feature dimension
        self.L = 20     # Number of items
        self.T_list = [5000 * i for i in range(1, 21)]  # Time steps list
        
        print(f"Synthetic data config loaded:")
        print(f"  - dataset: {self.dataset}")
        print(f"  - is_empirical: {self.is_empirical}")
        print(f"  - nu: {self.nu}")
        print(f"  - m: {self.m}")
        print(f"  - d: {self.d}")
        print(f"  - L: {self.L}")
        print(f"  - T_list: {self.T_list}")
    
    def _load_real_config(self):
        """Load configuration for real datasets"""
        self.dataset = "ml"
        self.is_empirical = 1
        self.nu = 1000  # Reduce number of users to lower memory usage
        self.m = 10     # Number of clusters
        self.L = 20     # Number of items
        self.T_list = [5000 * i for i in range(1, 21)]  # Time steps list
        
        # Read real dataset file (migrated into the package datasets directory)
        real_data_path = f'OffClusBandit/data/datasets/{self.dataset}_1000user_d20.npy'
        if not os.path.exists(real_data_path):
            raise FileNotFoundError(f"Real data file not found: {real_data_path}")
        
        try:
            self.real_theta = np.load(real_data_path)
            self.d = self.real_theta.shape[1]  # Get feature dimension from data
            print(f"Real data config loaded:")
            print(f"  - dataset: {self.dataset}")
            print(f"  - is_empirical: {self.is_empirical}")
            print(f"  - nu: {self.nu}")
            print(f"  - m: {self.m}")
            print(f"  - d: {self.d} (from data)")
            print(f"  - L: {self.L}")
            print(f"  - T_list: {self.T_list}")
            print(f"  - real_theta shape: {self.real_theta.shape}")
        except Exception as e:
            raise RuntimeError(f"Error loading real data: {e}")
    
    def get_environment_params(self, seed=None):
        """Get environment parameters"""
        if self.data_type == 'synthetic':
            # Use random seed for synthetic data
            if seed is None:
                seed = np.random.randint(1, 10000)
            return {
                'L': self.L,
                'd': self.d,
                'm': self.m,
                'num_users': self.nu,
                'setting': 'clustered',
                'K': 3,
                'seed': seed
            }
        else:  # real
            # Use fixed seed for real data
            return {
                'L': self.L,
                'd': self.d,
                'm': self.m,
                'num_users': self.nu,
                'real_theta': self.real_theta,
                'setting': 'clustered',
                'K': 3,
                'seed': 42
            }
    
    def get_algorithm_params(self):
        """Get algorithm parameters"""
        return {
            'nu': self.nu,
            'd': self.d,
            'T': 1000,
            'ni': self.L,
            'K': 3,
            'eta': 0.02,
            'setting': 'clustered'
        }
    
    def get_experiment_params(self):
        """Get experiment parameters"""
        return {
            'dataset': self.dataset,
            'is_empirical': self.is_empirical,
            'nu': self.nu,
            'd': self.d,
            'm': self.m,
            'L': self.L,
            'T_list': self.T_list
        }

def create_data_config(data_type='synthetic'):
    """
    Create a DataConfig instance
    
    Args:
        data_type: 'synthetic' or 'real'
    
    Returns:
        DataConfig instance
    """
    return DataConfig(data_type)

def demo_config():
    """Demonstrate configuration usage"""
    print("=== DataConfig Demo ===")
    
    # Synthetic data config
    print("\n--- Synthetic Data Config ---")
    try:
        synth_config = create_data_config('synthetic')
        env_params = synth_config.get_environment_params()
        algo_params = synth_config.get_algorithm_params()
        exp_params = synth_config.get_experiment_params()
        
        print(f"Environment params: {env_params}")
        print(f"Algorithm params: {algo_params}")
        print(f"Experiment params: {exp_params}")
    except Exception as e:
        print(f"Error with synthetic config: {e}")
    
    # Real data config
    print("\n--- Real Data Config ---")
    try:
        real_config = create_data_config('real')
        env_params = real_config.get_environment_params()
        algo_params = real_config.get_algorithm_params()
        exp_params = real_config.get_experiment_params()
        
        print(f"Environment params: {env_params}")
        print(f"Algorithm params: {algo_params}")
        print(f"Experiment params: {exp_params}")
    except Exception as e:
        print(f"Error with real config: {e}")

if __name__ == "__main__":
    demo_config() 