# Code encoding module for EHR data
# Handles conversion of medical codes to numerical indices

from collections import OrderedDict

from preprocess.parse_csv import EHRParser


def encode_code(patient_admission, admission_codes):
    """
    Convert medical codes to numerical indices
    Args:
        patient_admission: Dict mapping patient IDs to their admission records
        admission_codes: Dict mapping admission IDs to their medical codes
    Returns:
        admission_codes_encoded: Dict mapping admission IDs to encoded code indices
        code_map: Dict mapping original codes to their numerical indices
    """
    # Create mapping of unique codes to indices
    code_map = OrderedDict()
    for pid, admissions in patient_admission.items():
        for admission in admissions:
            codes = admission_codes[admission[EHRParser.adm_id_col]]
            for code in codes:
                if code not in code_map:
                    code_map[code] = len(code_map) + 1  # Start from 1

    # Convert codes to indices for each admission
    admission_codes_encoded = {
        admission_id: list(set(code_map[code] for code in codes))
        for admission_id, codes in admission_codes.items()
    }
    return admission_codes_encoded, code_map
