#!/usr/bin/env python3
"""Simple loader that reads AVMNIST metadata and train/test labels and prints them.

Usage:
  python 02_paper_experiments/analysis/053_audio_analysis.py

Defaults to `01_data/processed/avmnist` for the data directory.
"""
import os
import json
import ast
import argparse
import numpy as np


def find_file(datadir, filename):
    # try direct path, then avmnist_temp subdir
    p1 = os.path.join(datadir, filename)
    if os.path.exists(p1):
        return p1
    p2 = os.path.join(datadir, "avmnist_temp", filename)
    if os.path.exists(p2):
        return p2
    return None


def load_meta(datadir):
    # common metadata filename
    for fname in ("audioMNIST_meta.txt", "audioMNIST_meta.json", "audioMNIST_meta"):
        path = os.path.join(datadir, fname)
        if os.path.exists(path):
            text = open(path, "r", encoding="utf-8").read()
            # Try JSON first
            try:
                return json.loads(text)
            except Exception:
                pass
            # Try python literal eval (fallback)
            try:
                return ast.literal_eval(text)
            except Exception:
                pass
            # As a last resort, try to fix common issues: ensure it is a dict-like string
            try:
                text_stripped = text.strip()
                if not text_stripped.startswith("{"):
                    text_stripped = "{" + text_stripped + "}"
                return json.loads(text_stripped)
            except Exception:
                # give up
                raise RuntimeError(f"Unable to parse metadata file: {path}")
    raise FileNotFoundError(f"No metadata file found in {datadir}")


def load_labels(datadir, split_name):
    fname = f"{split_name}_labels.npy"
    path = find_file(datadir, fname)
    if path is None:
        raise FileNotFoundError(f"Labels file not found for '{split_name}' (looked for {fname} in {datadir} and avmnist_temp)")
    return np.load(path, allow_pickle=True)


def print_meta(meta):
    print("--- Metadata ---")
    if isinstance(meta, dict):
        keys = sorted(meta.keys(), key=lambda k: int(k) if str(k).isdigit() else str(k))
        for k in keys:
            print(f"{k}: {meta[k]}")
    else:
        print(repr(meta))


def print_labels(arr, name):
    print(f"--- {name} labels ---")
    print(f"shape: {getattr(arr, 'shape', None)}, dtype: {getattr(arr, 'dtype', None)}")
    # print full array; if very long, show summary plus tail/head
    length = getattr(arr, '__len__', lambda: None)()
    try:
        length = len(arr)
    except Exception:
        length = None
    if length is None or length <= 2000:
        print(arr)
    else:
        print("[array too long; showing head (100) and tail (100)]")
        print(arr[:100])
        print("...")
        print(arr[-100:])
    # basic stats
    try:
        unique, counts = np.unique(arr, return_counts=True)
        print("unique values:")
        for u, c in zip(unique, counts):
            print(f"  {u}: {c}")
    except Exception:
        pass


def main():
    p = argparse.ArgumentParser(description="Load and print AVMNIST metadata and labels")
    p.add_argument("--data-dir", default="01_data/processed/avmnist", help="Path to AVMNIST processed data")
    args = p.parse_args()

    datadir = args.data_dir
    print(f"Using data dir: {datadir}")

    meta = load_meta(datadir)
    print_meta(meta)

    # load train and test label arrays
    for split in ("train", "test"):
        try:
            arr = load_labels(datadir, split)
        except FileNotFoundError as e:
            print(str(e))
            continue
        print_labels(arr, split)


if __name__ == "__main__":
    main()
