import os
import json
import random
from tqdm import tqdm

def parse_expw(label_file_path, image_path, output_json_path, min_size=1000, max_size=2000):
    """
    Parses the label.lst file and saves the data in batches to a JSON file.

    Args:
        label_file_path (str): Path to the label.lst file.
        image_path (str): Path to the directory containing emotion subdirectories.
        output_json_path (str): Path to save the output JSON file.
        min_size (int): Minimum number of entries per emotion.
        max_size (int): Maximum number of entries per emotion.

    Returns:
        None
    """
    # Emotion label mapping
    emotion_map = {
        "0": "angry",
        "1": "disgust",
        "2": "fear",
        "3": "happy",
        "4": "sad",
        "5": "surprise",
        "6": "neutral"
    }

    # Initialize a dictionary with keys for each emotion
    data_by_emotion = {emotion: [] for emotion in emotion_map.values()}
    emotion_counts = {emotion: 0 for emotion in emotion_map.values()}  # Track counts for each emotion

    # Read the label.lst file
    with open(label_file_path, 'r') as file:
        lines = file.readlines()
        random.shuffle(lines)  # Shuffle the lines for random selection

        for line in tqdm(lines, desc="Processing labels"):
            parts = line.strip().split()
            if len(parts) != 8:
                continue  # Skip invalid lines

            image_name, face_id, top, left, right, bottom, confidence, emotion_label = parts

            # Construct the image path
            emotion = emotion_map.get(emotion_label, "unknown")
            if emotion not in data_by_emotion or emotion_counts[emotion] >= max_size:
                continue  # Skip if emotion has reached the max size

            relative_image_path = os.path.join(emotion, image_name)

            # Append the data as a compact list: [image_path, top, left, right, bottom, source]
            data_by_emotion[emotion].append([
                relative_image_path,
                int(top),
                int(left),
                int(right),
                int(bottom),
                "expw"  # Add the source attribute
            ])
            emotion_counts[emotion] += 1

            # Check if all emotions have reached at least the minimum size
            if all(count >= min_size for count in emotion_counts.values()):
                # Save to JSON and reset the dictionary for emotions that haven't reached max size
                save_to_json(data_by_emotion, output_json_path)
                data_by_emotion = {emotion: [] for emotion in emotion_map.values() if emotion_counts[emotion] < max_size}

    # Save any remaining data
    save_to_json(data_by_emotion, output_json_path)

def parse_rafdb(root_dir, output_json_path, min_size=300, max_size=600):
    """
    Parses the RAFDB dataset and saves the data to the same JSON file.

    Args:
        root_dir (str): Path to the RAFDB dataset root directory.
        output_json_path (str): Path to the JSON file to save the output.
        min_size (int): Minimum number of entries per emotion.
        max_size (int): Maximum number of entries per emotion.

    Returns:
        None
    """
    # Emotion mapping to match the existing structure
    emotion_map = {
        "anger": "angry",
        "disgust": "disgust",
        "fear": "fear",
        "happiness": "happy",
        "neutral": "neutral",
        "sadness": "sad",
        "surprise": "surprise"
    }

    # Initialize a dictionary with keys for each emotion
    data_by_emotion = {emotion: [] for emotion in emotion_map.values()}

    # Iterate through each emotion directory
    data_dir = os.path.join(root_dir, "data")
    boundingbox_dir = os.path.join(root_dir, "Annotation", "boundingbox")

    for emotion_dir in os.listdir(data_dir):
        emotion_path = os.path.join(data_dir, emotion_dir)
        if not os.path.isdir(emotion_path):
            continue  # Skip non-directory files

        # Map the directory name to the target emotion
        target_emotion = emotion_map.get(emotion_dir)
        if not target_emotion:
            continue  # Skip if the emotion is not in the mapping

        # Get all image files in the emotion directory
        image_files = [f for f in os.listdir(emotion_path) if f.endswith(('.jpg', '.png'))]
        if len(image_files) < min_size:
            print(f"Skipping {emotion_dir} as it has less than {min_size} images.")
            continue

        # Randomly select between min_size and max_size images
        selected_images = random.sample(image_files, min(len(image_files), max_size))

        for image_name in selected_images:
            # Find the bounding box file
            bbox_file = os.path.join(boundingbox_dir, f"{os.path.splitext(image_name)[0]}_boundingbox.txt")
            if not os.path.exists(bbox_file):
                print(f"Bounding box file not found for {image_name}, skipping.")
                continue

            # Read the bounding box coordinates
            with open(bbox_file, 'r') as bbox:
                bbox_coords = bbox.readline().strip().split()
                if len(bbox_coords) != 4:
                    print(f"Invalid bounding box format for {image_name}, skipping.")
                    continue

                top, left, right, bottom = map(float, bbox_coords)

            # Append the data as a compact list: [image_path, top, left, right, bottom, source]
            relative_image_path = os.path.join(emotion_dir, image_name)
            data_by_emotion[target_emotion].append([
                relative_image_path,
                int(top),
                int(left),
                int(right),
                int(bottom),
                "rafdb"  # Add the source attribute
            ])

    # Save the data to the JSON file
    save_to_json(data_by_emotion, output_json_path)

def save_to_json(data, output_json_path):
    """
    Appends the given data to a JSON file.

    Args:
        data (dict): The data to save.
        output_json_path (str): Path to the JSON file.

    Returns:
        None
    """
    # Append data to the JSON file
    with open(output_json_path, 'a') as json_file:
        json.dump(data, json_file)
        json_file.write('\n')  # Write a newline to separate batches

if __name__ == "__main__":
    root = "/fs/ess/PAS2099/sooyoung/vfm_dataset/emotion/data"
    
    label_file_path = f"{root}/Expw_original/label/label.lst"
    image_path = f"{root}/Expw_original/output"
    output_json_path = f"{root}/processed_data.json"

    try:
        parse_expw(label_file_path, image_path, output_json_path)
        print(f"EXPW data successfully saved to {output_json_path}")
    except Exception as e:
        print(f"Error: {e}")

    # Paths for the RAFDB dataset
    rafdb_root = "/fs/ess/PAS2099/sooyoung/vfm_dataset/emotion/data/RAF"

    try:
        # Parse the RAFDB dataset and save to the JSON file
        parse_rafdb(rafdb_root, output_json_path)
        print(f"RAFDB data successfully added to {output_json_path}")
    except Exception as e:
        print(f"Error: {e}")