import os
import json
import numpy as np

def symlink(json_path, save_path, mode="worst"):
    """Creates symlinks to the best or worst 10% images based on the scores in json_path."""
    with open(json_path, 'r') as f:
        data = json.load(f)

    scores = np.array(list(data.values()))

    if mode == "worst":
        threshold = np.percentile(scores, 10)  # 10th percentile
        selected_images = [img for img, score in data.items() if score <= threshold]
    elif mode == "best":
        threshold = np.percentile(scores, 90)  # 90th percentile
        selected_images = [img for img, score in data.items() if score >= threshold]
    else:
        raise ValueError("Mode must be 'worst' or 'best'.")

    os.makedirs(os.path.join(save_path, mode), exist_ok=True)
    for img_path in selected_images:
        link_name = os.path.join(save_path, mode, os.path.basename(img_path))
        # Only create symlink if it doesn't already exist
        #if not os.path.exists(link_name):
        #   os.symlink(img_path, link_name)

    return len(selected_images)

def compute_stats(json_path):
    """Compute mean, std, and 90th percentile given a JSON file of {image: score}."""
    with open(json_path, 'r') as f:
        data = json.load(f)

    # If the JSON file is empty or has no values, handle gracefully
    if not data:
        return {"mean": None, "std": None, "top_10_percentile": None}

    scores = np.array(list(data.values()))
    mean_val = float(scores.mean())
    std_val = float(scores.std())
    # This threshold is the score at which we reach the top 10%
    top_10_threshold = float(np.percentile(scores, 90))

    return {
        "mean": mean_val,
        "std": std_val,
        "top_10_percentile": top_10_threshold
    }

def main():
    ROOT_FOLDER = "/ROOT"

    # This dictionary will hold the statistics for each subfolder
    # Example structure:
    # {
    #   "subfolder_name": {
    #       "aesthetic": {"mean": ..., "std": ..., "top_10_percentile": ...},
    #       "corrupt": {"mean": ..., "std": ..., "top_10_percentile": ...}
    #   },
    #   ...
    # }
    folder_stats = {}

    for subfolder in os.listdir(ROOT_FOLDER):
        full_path = os.path.join(ROOT_FOLDER, subfolder)
        if not os.path.isdir(full_path):
            continue

        # Prepare a dictionary for the current subfolder's stats
        folder_stats[subfolder] = {}

        # Paths to JSON files
        aesthetic_json = os.path.join(full_path, "aesthetic.json")
        corrupt_json = os.path.join(full_path, "corrupt.json")

        # If you still want to create symlinks for worst/best aesthetic images:
        if os.path.exists(aesthetic_json):
            for mode in ["worst", "best"]:
                num_symlinks = symlink(aesthetic_json, full_path, mode=mode)
                print(f"Created {num_symlinks} symlinks for {subfolder} in {mode} mode.")

        # Compute stats for aesthetic.json
        if os.path.exists(aesthetic_json):
            folder_stats[subfolder]["aesthetic"] = compute_stats(aesthetic_json)
        else:
            folder_stats[subfolder]["aesthetic"] = None

        # Compute stats for corrupt.json
        if os.path.exists(corrupt_json):
            folder_stats[subfolder]["corrupt"] = compute_stats(corrupt_json)
        else:
            folder_stats[subfolder]["corrupt"] = None

    # Write out the statistics to a JSON file in the ROOT_FOLDER
    summary_path = os.path.join(ROOT_FOLDER, "stats_summary.json")
    with open(summary_path, "w") as f:
        json.dump(folder_stats, f, indent=4)

    print(f"\nStatistics summary saved to: {summary_path}")


if __name__ == "__main__":
    main()
