import json

# A single string for the system instructions, including examples.
SYSTEM_PROMPT_WITH_EXAMPLES = """\
You are ChatGPT, a large language model trained by OpenAI. 
You are given an 'assignment problem' in which there is a bipartite graph. 
The left set (e.g., A, B, C...) might represent agents (drivers, employees, etc.), 
and the right set (e.g., #101, #102, etc.) might represent tasks (packages, jobs, etc.). 
Each edge weight is the cost or time of assigning that agent to that task.

Your task:
1. Read the input bipartite graph (in JSON format).
2. Compute a minimum-cost matching using the Hungarian algorithm (or equivalent).
3. Return the result in a specific JSON output format.

------------------
INPUT FORMAT:
- A JSON dictionary-of-dictionaries.
- Outer keys = Left nodes (e.g., "A", "B", "C", ...).
- Inner keys = Right nodes (e.g., "#101", "#102", "#103", ...).
- Values = cost (integer) of assigning left node to right node.

Example of a small bipartite graph:
{
  "A": { "#101": 2, "#102": 1 },
  "B": { "#101": 3, "#102": 2 }
}

------------------
OUTPUT FORMAT:
A JSON object with two keys:
1. "solution_list":  A list of tuples or arrays of the form 
    [
      ["LeftNode", "RightNode", cost],
      ["LeftNode", "RightNode", cost],
      ...
    ]
   representing the chosen matches (one per each left node).
2. "solution_cost":  An integer that is the sum of the chosen edges' costs.

------------------
DEMONSTRATION EXAMPLES:

Example 1:
--------------
Input Graph:
{
  "A": { "#101": 2, "#102": 1 },
  "B": { "#101": 3, "#102": 2 }
}
Output (One Possible Correct Answer):
{
  "solution_list": [
    ["A", "#102", 1],
    ["B", "#101", 3]
  ],
  "solution_cost": 4
}
Explanation:
- A → #102 has cost 1
- B → #101 has cost 3
Total cost = 1 + 3 = 4.
(Checking other assignments, e.g., A→#101 + B→#102 has cost 2+2=4, so the minimal cost is 4.)

Example 2:
--------------
Input Graph:
{
  "A": {"#101": 10, "#102": 2, "#103": 2},
  "B": {"#101": 1, "#102": 0, "#103": 8},
  "C": {"#101": 5, "#102": 5, "#103": 1}
}
One Possible Correct Solution:
{
  "solution_list": [
    ["A", "#102", 2],
    ["B", "#101", 1],
    ["C", "#103", 1]
  ],
  "solution_cost": 4
}
Explanation:
- A→#102 = 2
- B→#101 = 1
- C→#103 = 1
Total cost = 2 + 1 + 1 = 4.

------------------
Now, for each problem below, please output a JSON object with 
"solution_list" and "solution_cost". 
"""

def create_prompts_for_assignment_problems(json_file_path):
    """
    Reads the assignment problems (including their solutions) from 
    'assignment_problems.json' and converts each into a structured 
    ChatCompletion-style list of messages.

    Returns a list of dictionaries, where each dictionary has:
      - problem_id
      - message (list of {role, content})
      - solution_list
      - solution_cost
    """
    with open(json_file_path, "r") as f:
        problems = json.load(f)

    # We will produce a single system+user conversation for each problem.
    # The system message is the same for all (with examples, etc.).
    # The user message will show the actual bipartite graph.
    all_questions = []

    for prob in problems:
        problem_id = prob["problem_id"]
        bipartite_graph = prob["graph"]
        solution_list = prob["solution_list"]
        solution_cost = prob["solution_cost"]

        # Create the system message (same for all).
        system_message = {
            "role": "system",
            "content": SYSTEM_PROMPT_WITH_EXAMPLES
        }

        # Create a user message that presents the bipartite graph 
        # and asks the LLM to provide the minimal cost solution in JSON.
        user_message = {
            "role": "user",
            "content": (
                f"Problem ID: {problem_id}\n\n"
                "Here is the bipartite graph in JSON:\n"
                f"{json.dumps(bipartite_graph, indent=2)}\n\n"
                "Please return the minimal assignment in the specified JSON format.\n"
            )
        }

        # We store these two messages in a list
        messages = [system_message, user_message]

        # Build one dictionary for each problem that includes everything
        question_dict = {
            "problem_id": problem_id,
            "message": messages,
            "solution_list": solution_list,   # ground-truth list of pairs
            "solution_cost": solution_cost    # ground-truth minimal cost
        }

        all_questions.append(question_dict)

    return all_questions

def main():
    """
    Main driver that reads from 'assignment_problems.json', 
    constructs the new prompts (with the system prompt containing examples),
    then writes the final data to 'assignment_prompts.json'.
    """
    # 1) Path to the JSON file with 100 assignment problems
    json_file_path = "assignment_problems.json"

    # 2) Generate the question prompts with system instructions + user prompt
    questions_with_prompts = create_prompts_for_assignment_problems(json_file_path)

    # 3) Save them in a new JSON file, each record containing:
    #    - problem_id
    #    - message (the conversation)
    #    - solution_list
    #    - solution_cost
    out_file = "assignment_prompts.json"
    with open(out_file, "w") as f:
        json.dump(questions_with_prompts, f, indent=2)

    print(f"Created '{out_file}' with {len(questions_with_prompts)} prompt entries.")

if __name__ == "__main__":
    main()
