import os
import json
import random
import logging
import hashlib
from datetime import datetime

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(message)s")

# Constants
MAX_MESSAGES = 15
PROMPT_DIRS = [
    "./prompts/diff_first_encounter_reasoning",
    "./prompts/caught_likelihood",
    "./prompts/countermeasures",
    "./prompts/pressure"
]

def load_all_data():
    all_data = {}
    for dir_path in PROMPT_DIRS:
        category = os.path.basename(dir_path)
        all_data[category] = []

        for file_name in os.listdir(dir_path):
            if file_name.endswith(".json"):
                file_path = os.path.join(dir_path, file_name)
                with open(file_path, "r") as f:
                    data = json.load(f)
                    messages = data.get("messages", [])
                    if messages:
                        all_data[category].append({
                            "file_name": file_name,
                            "messages": messages
                        })
    return all_data

def assemble_final_json(messages):
    return {
        "model": "gpt-4",
        "temperature": 0.0,
        "max_tokens": 1024,
        "misalignment_string": "",
        "canary": "",
        "messages": messages
    }

def select_message_at_index(all_data, index):
    eligible_choices = []
    for category, files in all_data.items():
        for file in files:
            if len(file["messages"]) > index:
                eligible_choices.append((category, file["file_name"], file["messages"][index]))
    if not eligible_choices:
        return None
    category, file_name, message = random.choice(eligible_choices)
    logging.info(f"Selected message from category: {category}, file: {file_name}, line: {index}, role: {message['role']}")
    return message

def select_special_messages(all_data):
    final_messages = []

    # First message (index 0 from countermeasures)
    counter_files = all_data["countermeasures"]
    counter_file = random.choice([f for f in counter_files if len(f["messages"]) > 0])
    first_msg = counter_file["messages"][0]
    final_messages.append(first_msg)
    logging.info(f"Selected message from category: countermeasures, file: {counter_file['file_name']}, line: 0, role: {first_msg['role']}")
    logging.info("Added first message.")

    # Middle messages (1 through 10)
    middle_messages = []
    for i in range(1, MAX_MESSAGES - 3 + 1):
        msg = select_message_at_index(all_data, i)
        if msg:
            middle_messages.append(msg)
    logging.info(f"Added {len(middle_messages)} middle messages.")
    final_messages.extend(middle_messages)

    # Second-last message from diff_first_encounter_reasoning
    eligible = [f for f in all_data["diff_first_encounter_reasoning"] if len(f["messages"]) >= 2]
    selected_file = random.choice(eligible)
    second_last_msg = selected_file["messages"][-2]
    final_messages.append(second_last_msg)
    logging.info(f"Selected second-last message from category: diff_first_encounter_reasoning, file: {selected_file['file_name']}, line: {len(selected_file['messages']) - 2}, role: {second_last_msg['role']}")

    # Last message from caught_likelihood
    eligible = [f for f in all_data["caught_likelihood"] if len(f["messages"]) >= 1]
    selected_file = random.choice(eligible)
    last_msg = selected_file["messages"][-1]
    final_messages.append(last_msg)
    logging.info(f"Selected last message from category: caught_likelihood, file: {selected_file['file_name']}, line: {len(selected_file['messages']) - 1}, role: {last_msg['role']}")

    # Add 'date' field to user messages
    for msg in final_messages:
        if msg["role"] == "user":
            msg["date"] = datetime.now().strftime("%Y-%m-%d")

    return final_messages

def hash_messages(messages):
    """Generate a hash of the message contents to detect duplicates."""
    msg_string = json.dumps(messages, sort_keys=True)
    return hashlib.md5(msg_string.encode('utf-8')).hexdigest()

def create_multiple_unique_jsons(num_files=10):
    all_data = load_all_data()
    os.makedirs("adlib", exist_ok=True)

    seen_hashes = set()
    generated = 0
    attempts = 0
    max_attempts = num_files * 5  # fail-safe to prevent infinite loops

    while generated < num_files and attempts < max_attempts:
        logging.info(f"\n=== Attempt {attempts + 1} ===")
        final_messages = select_special_messages(all_data)
        msg_hash = hash_messages(final_messages)

        if msg_hash in seen_hashes:
            logging.info("Duplicate detected. Retrying...\n")
            attempts += 1
            continue

        seen_hashes.add(msg_hash)
        final_output = assemble_final_json(final_messages)

        output_path = f"adlib/final_assembled_{generated + 1}.json"
        with open(output_path, "w") as f:
            json.dump(final_output, f, indent=2)
        logging.info(f"✅ Saved unique JSON to {output_path}")

        generated += 1
        attempts += 1

    if generated < num_files:
        logging.warning(f"Only {generated} unique files created out of {num_files} requested.")

# 🔁 Call function to generate multiple unique JSONs
create_multiple_unique_jsons(num_files=150)






