import os
import json
import glob
import re

# --- Score converters ---
def rescale_offer(x):
    return round((x / 100) * 6 + 1, 2)

def round_to_quartile(x):
    return round(round(x / 100 / 0.25) * 0.25, 2)

def round_to_decile(x):
    return round(round(x / 100 / 0.1) * 0.1, 2)

# --- Main conversion function ---
def convert_flat_files_to_webppl_compatible():
    inference_dirs = [d for d in glob.glob('youtube_inference_results_*') if os.path.isdir(d)]

    tasks = {
        "offer": ("offer_quality", rescale_offer, "value"),
        "reward": ("reward_quality", round_to_quartile, "val_vr"),
        "trust": ("trust", round_to_decile, "rec_vr")
    }

    for inference_dir in inference_dirs:
        model_match = re.search(r'youtube_inference_results_(.+)', inference_dir)
        if not model_match:
            continue
        model = model_match.group(1)

        output_base_dir = f"webppl_compatible_{inference_dir}"
        os.makedirs(output_base_dir, exist_ok=True)

        for prompt in ["direct", "cot"]:
            for perspective in ["first", "user"]:
                combined_data = {}
                valid = True

                for task_key, (task_prefix, converter, output_field) in tasks.items():
                    pattern = f"{model}_{task_prefix}_results_{perspective}_{prompt}.json"
                    full_path = os.path.join(inference_dir, pattern)
                    if not os.path.isfile(full_path):
                        print(f"⚠️ Missing file: {full_path}")
                        valid = False
                        break

                    with open(full_path, 'r') as f:
                        task_data = json.load(f)

                    for item in task_data:
                        video_id = item.get("video_id")
                        raw_score = item.get("flipped_score")
                        if not video_id or raw_score is None:
                            continue
                        if video_id not in combined_data:
                            # Keep all fields except transcripts and model response
                            combined_data[video_id] = {
                                "video_id": video_id,
                                **{k: v for k, v in item.items()
                                if k not in ["video_sponsorship_transcript", "model_response", "video_id"]}
                            }
                        # Add transformed score
                        combined_data[video_id][output_field] = converter(raw_score)

                if not valid:
                    continue

                output_file = os.path.join(output_base_dir, f"{model}_{perspective}_{prompt}.json")
                with open(output_file, 'w') as f:
                    json.dump(list(combined_data.values()), f, indent=2)

                print(f"✅ Created: {output_file}")

if __name__ == "__main__":
    convert_flat_files_to_webppl_compatible()
