# global_label_converter.py

"""
Global Label Converter

This module provides the GlobalLabelConverter class, which is responsible for loading
activity label configurations from a YAML file, converting them into Pandas DataFrames
for efficient querying, and transforming NumPy arrays of labels into their corresponding
merged labels.

Features:
1. Load and parse configuration from a YAML file.
2. Convert activity labels into table (Pandas DataFrame) format for efficient querying.
3. Convert a NumPy array of labels to an array of merged labels based on the configuration.
"""

import os
import yaml
import pandas as pd
from utils.path_utils import get_directory_path

class GlobalLabelConverter:
    """
    GlobalLabelConverter is responsible for managing and converting activity labels
    across multiple datasets. It loads configuration from a YAML file, structures
    the data into Pandas DataFrames for efficient access, and provides methods to
    convert label arrays to their corresponding merged labels.
    """

    def __init__(self, config_filename):
        """
        Initialize the GlobalLabelConverter.

        :param config_filename: Name of the configuration file, e.g., 'merged_activity_labels.yaml'
        """
        self.config_path = os.path.join(get_directory_path("configs"), config_filename)
        self.config = self.load_config()
        self.dataset_tables = self.build_dataset_tables()

    def load_config(self):
        """
        Load and parse the YAML configuration file.

        :return: Parsed configuration as a dictionary
        :raises FileNotFoundError: If the configuration file does not exist
        :raises yaml.YAMLError: If there is an error parsing the YAML file
        """
        if not os.path.exists(self.config_path):
            raise FileNotFoundError(f"Configuration file not found: {self.config_path}")

        try:
            with open(self.config_path, 'r', encoding='utf-8') as file:
                config = yaml.safe_load(file)
            return config
        except yaml.YAMLError as e:
            raise yaml.YAMLError(f"Error parsing YAML file: {e}")

    def build_dataset_tables(self):
        """
        Convert each dataset's activities into a Pandas DataFrame for efficient querying.

        :return: Dictionary mapping dataset names to their corresponding DataFrames
        """
        tables = {}
        for dataset_name, activities in self.config.items():
            df = pd.DataFrame(activities)
            # Ensure 'label' column is of string type for consistency
            df['label'] = df['label'].astype(str)
            # Set 'label' as the index for faster lookup
            df.set_index('label', inplace=True)
            tables[dataset_name] = df
        return tables

    def convert_labels(self, dataset_name, labels_array):
        """
        Convert a NumPy array of labels to their corresponding merged labels.

        :param dataset_name: Name of the dataset, e.g., 'dsads'
        :param labels_array: NumPy 1D array of labels
        :return: NumPy array of merged labels
        :raises KeyError: If the dataset or any label does not exist
        """
        # Vectorized operation using Pandas for efficiency
        label_series = pd.Series(labels_array).astype(str)
        if dataset_name not in self.dataset_tables:
            raise KeyError(f"Dataset '{dataset_name}' not found in the configuration.")

        dataset_df = self.dataset_tables[dataset_name]
        try:
            merged_labels = dataset_df.loc[label_series, 'merged label'].values
            return merged_labels
        except KeyError as e:
            missing_labels = set(label_series) - set(dataset_df.index)
            raise KeyError(f"Labels {missing_labels} not found in dataset '{dataset_name}'.") from e

    def __repr__(self):
        return f"GlobalLabelConverter(config_path='{self.config_path}')"
