import pandas as pd

class Metabric:
    def __init__(self):
        # The file path assumes the CSV is in a 'data/files/' directory.
        # Update the path if your file is located elsewhere.
        try:
            self._data = pd.read_csv('data/files/metabric.csv')
        except FileNotFoundError:
            raise FileNotFoundError("Please ensure 'metabric.csv' is in the 'data/files/' directory.")

    @property
    def event(self):
        return 'e'
    
    @property
    def time(self):
        return 't'
    
    @property
    def continuous_predictors(self):
        return ['x00', 'x01', 'x02', 'x03', 'x08']
    
    @property
    def categorical_predictors(self):
        return ['x04', 'x05', 'x06', 'x07']

    @property
    def data(self):
        return self._data

    def preprocess(self):
        """
        Performs minimal preprocessing on the METABRIC dataset.

        This involves only the most essential steps for data usability:
        1.  **Standardizing Names**: Renames target columns to 'event' and 'time'.
        2.  **Encoding Categorical Features**: One-hot encodes categories to make them numeric.
        
        Note: Scaling and missing value imputation are NOT performed.

        Returns:
            pd.DataFrame: A minimally processed DataFrame.
        """
        df = self._data.copy()

        # 1. Rename columns for consistency
        df.rename(columns={'e': 'event', 't': 'time'}, inplace=True)
        
        # 2. One-hot encode categorical features
        # This is a necessary structural change to use categories in most models.
        df_encoded = pd.get_dummies(df[self.categorical_predictors], 
                                    prefix=self.categorical_predictors, 
                                    drop_first=True)

        # 3. Combine back into a final DataFrame
        # We keep the original continuous predictors (unscaled)
        processed_df = pd.concat([df[['event', 'time'] + self.continuous_predictors], 
                                  df_encoded], 
                                 axis=1)

        return processed_df

# Example of how to use the revised class
if __name__ == '__main__':
    metabric_dataset = Metabric()
    
    print("--- Original Data ---")
    print(metabric_dataset.data.head())
    
    # Preprocess the data with the minimal function
    processed_data = metabric_dataset.preprocess()
    
    print("\n" + "="*30 + "\n")
    
    print("--- Processed Data (No Scaling) ---")
    print(processed_data.head())