{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### IMPORTANT: Set your OpenAI key in the \"Specify your OpenAI key\" cell or this wont run!!!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note 1: If a cell fails to run, try rerunning it. That usually fixes it. (Usually this is because the agent is getting stuck in a loop of trying to find evidence to support a difficult to support claim). Put it in a try catch loop and it should work eventually\n",
    "\n",
    "Note 2: Brew some coffee - The full notebook takes about 1 hour to run for an average round. You can see the debate happen in real time by scrolling down.\n",
    "\n",
    "Note 3: This is running with gpt-4.1-mini. It costs ~1-3 USD to simulate a full debate (run the full notebook) per run. Don't say I didn't warn you! This is a DEEP Debater!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Install Dependencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 install ducksearch openai ag2 tqdm agentops"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Download the OpenDebateEvidence bm25 index database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import requests\n",
    "from tqdm.notebook import tqdm\n",
    "from IPython.display import display, HTML\n",
    "import json\n",
    "import ast\n",
    "\n",
    "\n",
    "def download_file_from_hf(url, dest_path, chunk_size=1024*1024):\n",
    "    \"\"\"\n",
    "    Download a file from a URL with progress bar.\n",
    "    \"\"\"\n",
    "    if os.path.exists(dest_path):\n",
    "        print(f\"{dest_path} already exists. Skipping download.\")\n",
    "        return\n",
    "    response = requests.get(url, stream=True)\n",
    "    total = int(response.headers.get('content-length', 0))\n",
    "    with open(dest_path, 'wb') as file, tqdm(\n",
    "        desc=f\"Downloading {os.path.basename(dest_path)}\",\n",
    "        total=total,\n",
    "        unit='B',\n",
    "        unit_scale=True,\n",
    "        unit_divisor=1024,\n",
    "    ) as bar:\n",
    "        for data in response.iter_content(chunk_size=chunk_size):\n",
    "            size = file.write(data)\n",
    "            bar.update(size)\n",
    "    print(f\"Downloaded to {dest_path}\")\n",
    "\n",
    "db_url = \"https://huggingface.co/datasets/Hellisotherpeople/OpenDebateEvidenceBM25DuckDB/resolve/main/opendebateevidence_good.duckdb\"\n",
    "db_path = \"opendebateevidence_good.duckdb\"\n",
    "\n",
    "download_file_from_hf(db_url, db_path)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Function for retrieving the actual evidence given ID"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import duckdb\n",
    "\n",
    "\n",
    "def get_document_by_id(doc_id):\n",
    "    \"\"\"\n",
    "    Query the bm25_tables.documents table for a document by its id.\n",
    "    Returns the document as a dictionary, or None if not found.\n",
    "    \"\"\"\n",
    "    con = duckdb.connect(\"opendebateevidence_good.duckdb\")\n",
    "    query = \"\"\"\n",
    "        SELECT * FROM bm25_tables.documents WHERE id = ?\n",
    "    \"\"\"\n",
    "    result = con.execute(query, [doc_id]).fetchone()\n",
    "    #print(result)\n",
    "    if result is None:\n",
    "        return None\n",
    "    # Get column names for dict conversion\n",
    "    colnames = [desc[0] for desc in con.description]\n",
    "    con.close()\n",
    "    return dict(zip(colnames, result))\n",
    "\n",
    "# Example usage:\n",
    "doc = get_document_by_id(1655472)\n",
    "from pprint import pprint\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Function for searching with BM25 and ducksearch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ducksearch import search\n",
    "from IPython.display import display, HTML\n",
    "\n",
    "\n",
    "def search_debate_cards(query: str) -> list:\n",
    "    \"\"\"\n",
    "    Search the debate card database using BM25 keyword search.\n",
    "\n",
    "    For best results with BM25 search:\n",
    "    - Use specific keywords rather than natural sentences\n",
    "    - Include key technical terms and proper nouns\n",
    "    - Avoid common words and stop words\n",
    "    - Order keywords from most to least important\n",
    "    - Do not search for specific dates (e.g., \"2020\", \"February 15, 2020\", etc.)\n",
    "    - Avoid searching for debate terms like \"advantage\", \"uniqueness\", \"cp\", \"solvency\", \"link\", \"impact\", etc. Instead, focus on the actual topic or content you want evidence about.\n",
    "    \n",
    "    Example good query: \"Biden foreign policy achievements NATO Ukraine\"\n",
    "    Example bad query: \"What are some good things Biden has done as president?\"\n",
    "    Bad query: \"uniqueness impact link advantage Biden\"\n",
    "    Bad query: \"immigration court reform 2020\"\n",
    "\n",
    "    Args:\n",
    "        query (str): The search query using keywords and operators.\n",
    "                     Example: \"Biden NATO expansion military aid\"\n",
    "\n",
    "    Returns:\n",
    "        list: List of matching debate cards (as dicts), ranked by BM25 relevance score.\n",
    "    \"\"\"\n",
    "    top_k = 10\n",
    "    display_markdown = True\n",
    "    \n",
    "    result = search.documents(\n",
    "        database=\"opendebateevidence_good.duckdb\",\n",
    "        queries=[query],\n",
    "        top_k=top_k,\n",
    "        # filters=filters,\n",
    "        order_by=\"score DESC\",\n",
    "    )\n",
    "    import random\n",
    "    return_list = [{\"id\": r.get(\"id\"), \"tag\": r.get(\"tag\"), \"fullcite\": r.get(\"fullcite\"), \"markup\": r.get(\"markup\")} for r in result[0]]\n",
    "    random.shuffle(return_list)\n",
    "    \n",
    "    # Display the markdown of each result using HTML\n",
    "    if display_markdown:\n",
    "        # Create a collapsible HTML section for the cards\n",
    "        collapsible_html = \"\"\"\n",
    "        <details style=\"margin-bottom:1em;\">\n",
    "          <summary style=\"font-size:1.1em; font-weight:bold; cursor:pointer;\">Show Search Results</summary>\n",
    "          <div id=\"debate-cards-collapsible\">\n",
    "        \"\"\"\n",
    "        for r in return_list:\n",
    "            if r.get(\"markup\"):\n",
    "                collapsible_html += r[\"markup\"] + \"<hr style='margin:1em 0;'>\"\n",
    "        collapsible_html += \"</div></details>\"\n",
    "        display(HTML(collapsible_html))\n",
    "\n",
    "    return return_list\n",
    "\n",
    "# Example usage:\n",
    "results = search_debate_cards(\"Biden is a good president\")[0]\n",
    "print(results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Specify your OpenAI key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from autogen import ConversableAgent, LLMConfig, register_function\n",
    "from autogen import GroupChatManager\n",
    "from autogen import ConversableAgent, Agent\n",
    "from pydantic import BaseModel, Field\n",
    "from typing import List, Literal\n",
    "import agentops\n",
    "from autogen import GroupChat\n",
    "from pprint import pprint\n",
    "import json\n",
    "\n",
    "\n",
    "ENABLE_AGENTOPS = False  # Set to True to enable agentops.init\n",
    "\n",
    "if ENABLE_AGENTOPS:\n",
    "    agentops.init(api_key=\"\")\n",
    "\n",
    "# Set your OpenAI API key here so you don't have to keep regenerating it\n",
    "OPENAI_API_KEY = \"\"\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Specify Debate Topic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "debate_topic = \"Resolved: The United States, Canada, and Mexico should form a North American Union similar to the European Union.\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Affirmative Workflows"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plantext Generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_plantext_for_topic(debate_topic: str) -> str:\n",
    "    \"\"\"\n",
    "    Encapsulates the plantext generation workflow and returns the plantext string for the given debate topic.\n",
    "    \"\"\"\n",
    "    # Structured output for plantext generation and review\n",
    "    class PlantextReview(BaseModel):\n",
    "        plantext: str\n",
    "        rationale: str\n",
    "        advice_for_next_search: str\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2,\n",
    "        top_p=0.9,\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2,\n",
    "        top_p=0.9,\n",
    "    )\n",
    "    plan_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=PlantextReview,\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2,\n",
    "        top_p=0.9,\n",
    "    )\n",
    "\n",
    "    # Agent that devises and iteratively refines a plantext based on evidence\n",
    "    plantext_generator = ConversableAgent(\n",
    "        name=\"plantext_generator\",\n",
    "        system_message=(\n",
    "            \"You are a policy debate expert tasked with generating a well-supported plantext for a given debate topic. \"\n",
    "            \"You must extensively review the debate evidence dataset, iteratively searching for evidence and revising your plantext. \"\n",
    "            \"Your plantext should be generic enough that it is likely to be well-supported by available evidence in inherency, links, impacts, and solvency, but as specific as possible otherwise.  \"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent that searches the debate evidence dataset\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag or query. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    # Executor agent for running search tools\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent that reviews the plantext and determines if it is ready\n",
    "    plantext_reviewer = ConversableAgent(\n",
    "        name=\"plantext_reviewer\",\n",
    "        system_message=(\n",
    "            \"You are a highly rigorous debate coach. Your job is to review the current plantext, the rationale, and the evidence gathered so far. \"\n",
    "            \"For each plantext, assess whether it is likely to be well-supported in inherency, links, impacts, and solvency, based on the evidence, for a wide variety of advantages and stock issues. \"\n",
    "            \"The plantext should always be written in the form: 'Plan: The <plan actor> should <do actions>'.\"\n",
    "            \"The plantext should try to use a specific actor (i.e. a specific branch of the federal government) and a specific action (i.e. a specific policy or program). That said, it should remain as generic as possible otherwise so that it's easier to gather evidence in support of it. \"\n",
    "            \"Give advice for the next search iteration for how to search for evidence to improve the plantext given the current plantext, evidence, and rationale. \"\n",
    "            \"After each search, update your plantext and provide rationale and advice for the next search. \"\n",
    "        ),\n",
    "        llm_config=plan_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    # Register the search function (do not modify this part)\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    iterations = 0\n",
    "    MAX_ITERATIONS = 4\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        \"\"\"\n",
    "        Custom speaker selection for plantext generation and review.\n",
    "        Forces 5 search-review iterations before ending, or ends early if plan_ready is True.\n",
    "        \"\"\"\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) == 0:\n",
    "            return plantext_generator\n",
    "\n",
    "        # If last was plantext_generator, search next\n",
    "        if last_speaker is plantext_generator:\n",
    "            return debate_search_agent\n",
    "\n",
    "        # If last was search, execute search\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        # If last was executor, review plantext and evidence\n",
    "        if last_speaker is executor_agent:\n",
    "            return plantext_reviewer\n",
    "\n",
    "        # If last was reviewer, check if plan is ready or continue\n",
    "        if last_speaker is plantext_reviewer:\n",
    "            iterations += 1\n",
    "            # print(f\"Iteration: {iterations}\")\n",
    "            # Check if plan_ready is True in the last reviewer message\n",
    "            try:\n",
    "                content = messages[-1][\"content\"]\n",
    "                if isinstance(content, dict):\n",
    "                    plan_ready = content.get(\"plan_ready\", \"False\")\n",
    "                else:\n",
    "                    import json\n",
    "                    plan_ready = json.loads(content).get(\"plan_ready\", \"False\")\n",
    "            except Exception:\n",
    "                plan_ready = \"False\"\n",
    "            if iterations >= MAX_ITERATIONS:\n",
    "                return None\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        # Default fallback\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[plantext_generator, debate_search_agent, executor_agent, plantext_reviewer],\n",
    "        messages=[],\n",
    "        max_round=50,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Example: Generate a plantext for a debate topic\n",
    "    \n",
    "    chat_result = plantext_generator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=f\"Devise a generic plantext for the following debate topic, using iterative literature review: {debate_topic}\",\n",
    "        silent=True,\n",
    "    )\n",
    "\n",
    "    plantext_output = json.loads(chat_result.chat_history[-1][\"content\"])[\"plantext\"]\n",
    "    return plantext_output\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#plantext_output = generate_plantext_for_topic(debate_topic)\n",
    "\n",
    "#print(plantext_output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Harms Workflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_plan_with_harm_evidence(plan_text: str, debate_topic: str) -> str:\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Only the argument/tag, not the verbatim card\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of harms to support a specific plan. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan into the exact harms or impacts that must be proven for the case to win.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically supports the plan's harms or impact claims.\\n\"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database (cutoff year 2022).\\n\"\n",
    "            \"   - Suggesting query refinements to maximize the chance of finding evidence that is both recent and directly supports the plan's harms.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Recency and timeliness (must be as recent as possible, ideally within the last 6 years).\\n\"\n",
    "            \"   - Direct, explicit support for the plan's harm or impact claim (evidence must not merely be tangentially related).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the harm is significant and relevant to the plan, not just a generic or background problem.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the plan's harms or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most plan-relevant harms evidence possible.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as harms evidence supporting a specific plan in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Recency (must be from within the last 6 years unless historically significant)\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the plan's harm or impact claim)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the plan's harms, not just generic support)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the harm is significant and directly relevant to the plan)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the impact for the plan)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False') any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates or closely mirrors already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the plan's impact link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly plan-relevant for harms\\n\\n\"\n",
    "            \"If you decide the card should be included, you MUST:\\n\"\n",
    "            \"- Retag the evidence with a new, precise long tag in the 'new_long_tag' variable. The new tag must explicitly pertain to a harm that the plan is trying to solve, making clear the specific impact or problem addressed by the plan.\\n\"\n",
    "            \"- Cut the evidence as a policy debater would: reproduce the EXACT source material (any failure here is strictly grounds for disqualification), but with newly modified highlighting and underlining to emphasize the most important parts for the plan's harms.\\n\"\n",
    "            \"- Under NO circumstances should you fail to fully and exactly transcribe the selected evidence in the 'new_markup_underlined_highlighted' variable. This must be a faithful, complete reproduction of the chosen text, with your new markup applied.\\n\"\n",
    "            \"- You must also generate a fully detailed, well-structured, and *long* argument in support of the card, explaining how and why this evidence proves the plan's harm. This argument should be written as though it will be orally presented to an audience in a debate round as the first card after the plantext—make it persuasive, logically sound, and tailored to the plan and topic. The argument you generate should be included in the 'retagged_argument_as_read_outloud_in_the_debate_round' field of the DebateCard. The argument must be long, thorough, and detailed, not a short tag or summary. \"\n",
    "            \"IMPORTANT: The 'retagged_argument_as_read_outloud_in_the_debate_round' field must ONLY contain the new argument/tag to be read out loud, NOT the verbatim card or evidence text. Do NOT include the full card or any evidence markup in this field—only the argument/tag, and ensure it is a long, detailed, and fully developed argument.\"\n",
    "            \"\\n\\nYour goal is to ensure we have the highest quality, plan-specific harms evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 4. Registers the tool with the agents, the description will be used by the LLM\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    global iterations\n",
    "    iterations = 0\n",
    "\n",
    "    allowed_transitions = {\n",
    "        argument_evaluator: [debate_search_agent],\n",
    "        debate_search_agent: [executor_agent],\n",
    "        executor_agent: [argument_evaluator]\n",
    "    } # Not being used but a good example\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        \"\"\"Define a customized speaker selection function.\n",
    "        A recommended way is to define a transition for each speaker in the groupchat.\n",
    "\n",
    "        Returns:\n",
    "            Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.\n",
    "        \"\"\"\n",
    "        global iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        # We'll start with a transition to the planner\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations > 2:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Example: Replace with a plan-specific harms prompt\n",
    "\n",
    "    # Include the debate topic as a separate string for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"Debate Topic: \\\"{debate_topic}\\\"\\n\"\n",
    "            f\"Plan: \\\"{plan_text}\\\"\\n\\n\"\n",
    "            \"Assume that the current year is 2022 for the purposes of the 6-year evidence recency requirement.\\n\"\n",
    "            \"Find the best, most recent, and most plan-specific evidence of harms supporting the plan above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the plan addresses a significant harm or impact. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not strictly relevant to the plan's harms. \"\n",
    "            \"When generating the retagged_argument_as_read_outloud_in_the_debate_round, ensure that the argument is long, detailed, and fully developed—at least several sentences or a full paragraph, not a short tag or summary.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    harm_output_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    harm_card = json.loads(harm_output_raw_string)[\"cards\"][0]\n",
    "    harm_output_id = harm_card[\"id\"]\n",
    "    # Only the argument/tag, not the verbatim card\n",
    "    harm_argument = harm_card.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "    # reason_to_include is not included in the output string per instructions\n",
    "\n",
    "    doc = get_document_by_id(harm_output_id)\n",
    "    output_string = (\n",
    "        f\"<div>\"\n",
    "        f\"<h2>Debate Topic</h2>\"\n",
    "        f\"<p>{debate_topic}</p>\"\n",
    "        f\"<h2>Plan</h2>\"\n",
    "        f\"<p>{plan_text}</p>\"\n",
    "        f\"<h2>Harm Argument</h2>\"\n",
    "        f\"<p>{harm_argument}</p>\"\n",
    "        f\"<h2>Harm Evidence</h2>\"\n",
    "        f\"{doc['markup']}\"\n",
    "        f\"</div>\"\n",
    "    )\n",
    "    return output_string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#debate_case = generate_plan_with_harm_evidence(plantext_output, debate_topic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inherency Workflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def append_inherency_argument_and_evidence(plan_string):\n",
    "\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Set up agents and group chat as before\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of inherency to support a specific plan and its articulated harm. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan and the chosen harm into the exact barriers, structural obstacles, or status quo failures that prevent the plan's harm from being solved under current policy.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates that the harm persists in the status quo and will not be solved absent the plan (i.e., inherency).\\n\"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database (cutoff year 2022).\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that is both recent and directly supports the inherency claim for the plan and harm.\\n\"\n",
    "            \"   - Do NOT select or search for any evidence that has already appeared in the debate case so far (including any evidence already included for harms or in previous sections). Exclude any such evidence from your search and selection process.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Recency and timeliness (must be as recent as possible, ideally within the last 6 years).\\n\"\n",
    "            \"   - Direct, explicit support for the inherency claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the harm is ongoing and not being solved by current policy, and that the plan is necessary to address it.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the inherency of the harm or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most plan- and harm-relevant inherency evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence or any evidence already present in the debate case.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as inherency evidence supporting a specific plan and harm in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Recency (must be from within the last 6 years unless historically significant)\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the plan's inherency claim for the articulated harm)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the inherency of the harm, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the harm is ongoing and not being solved by current policy)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the inherency link for the plan and harm)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Reject any evidence that has already appeared in the debate case so far (including any evidence used for harms or in previous sections). Do not allow any evidence to be included more than once in the debate case.\\n\"\n",
    "            \"4. Ensure terminology precisely matches what's needed for the plan's inherency link chains\\n\"\n",
    "            \"5. Only approve evidence that meets ALL evaluation criteria and is strictly plan- and harm-relevant for inherency\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, plan- and harm-specific inherency evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=(\n",
    "            \"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards. \"\n",
    "            \"Do NOT search for or return any evidence that has already appeared in the debate case so far (including any evidence already included for harms or in previous sections). Exclude any such evidence from your search results.\"\n",
    "        ),\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    global iterations\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        global iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations > 0:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=20,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Run the inherency workflow\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{plan_string}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Find the best, most recent, and most plan- and harm-specific evidence of inherency supporting the plan and the articulated harm above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the harm persists in the status quo and will not be solved absent the plan (i.e., inherency). \"\n",
    "            \"Reject any evidence that is generic, tangential, not relevant to the plan's inherency for the articulated harm. Reject using the harm evidence as the inherency evidence. \"\n",
    "            \"Do NOT select or include any evidence that has already appeared in the debate case so far (including any evidence already included for harms or in previous sections).\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "\n",
    "    inherency_output_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    inherency_card = json.loads(inherency_output_raw_string)[\"cards\"][0]\n",
    "    inherency_output_id = inherency_card[\"id\"]\n",
    "    inherency_argument = inherency_card.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    doc = get_document_by_id(inherency_output_id)\n",
    "\n",
    "    # Append inherency argument and evidence to plan_string with HTML tags\n",
    "    inherency_html = (\n",
    "        f\"<div>\"\n",
    "        f\"<h2>Inherency Argument</h2>\"\n",
    "        f\"<p>{inherency_argument}</p>\"\n",
    "        f\"<h2>Inherency Evidence</h2>\"\n",
    "        f\"{str(doc['markup'])}\"\n",
    "        f\"</div>\"\n",
    "    )\n",
    "    return plan_string + \"\\n\" + inherency_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#debate_case = append_inherency_argument_and_evidence(debate_case)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advantages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_advantages(debate_case: str) -> list[str]:\n",
    "\n",
    "    class Advantage(BaseModel):\n",
    "        title: str\n",
    "        core_argument: str\n",
    "\n",
    "    class Advantages(BaseModel):\n",
    "        advantages: List[Advantage] = Field(..., min_items=3, max_items=3)\n",
    "        rationale: str\n",
    "        advice_for_next_search: str\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9\n",
    "    )\n",
    "    adv_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=Advantages,\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9\n",
    "    )\n",
    "\n",
    "    advantage_generator = ConversableAgent(\n",
    "        name=\"advantage_generator\",\n",
    "        system_message=(\n",
    "            \"You are a policy debate expert tasked with generating 1-3 well-supported advantages for a given plantext, using the provided harms and inherency evidence. \"\n",
    "            \"You must extensively review the debate evidence dataset, iteratively searching for evidence and revising your advantages. \"\n",
    "            \"Each advantage should be clearly articulated, specific to the plantext, and directly supported by the harms and inherency evidence. \"\n",
    "            \"Advantages should be distinct, non-redundant, and phrased in a way that makes them easy to support with available evidence. \"\n",
    "            \"Importantly, you must follow policy debate tradition: each advantage should be highly unique and diverse, ideally representing a different traditional policy debate domain (e.g., one about the economy, another about the environment, another about geopolitics, public health, social justice, etc.). \"\n",
    "            \"Do not generate multiple advantages that are similar or overlap in impact area. Strive for maximum diversity and uniqueness in the types of advantages you generate, drawing on classic policy debate categories and creative, well-supported impacts.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag or query. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    advantage_reviewer = ConversableAgent(\n",
    "        name=\"advantage_reviewer\",\n",
    "        system_message=(\n",
    "            \"You are a highly rigorous debate coach. Your job is to review the current set of advantages, the rationale, and the evidence gathered so far. \"\n",
    "            \"For each advantage, assess whether it is likely to be well-supported by the plantext, harms, and inherency evidence, and whether it is distinct and non-redundant. \"\n",
    "            \"Give advice for the next search iteration for how to search for evidence to improve the advantages given the current advantages, evidence, and rationale. \"\n",
    "            \"After each search, update your advantages and provide rationale and advice for the next search. \"\n",
    "            \"You may suggest rewording, combining, or splitting advantages as needed to maximize clarity and support. \"\n",
    "            \"Encourage the generation of highly unique and diverse advantages, each representing a different traditional policy debate domain (e.g., economy, environment, geopolitics, public health, social justice, etc.), and discourage overlap or redundancy in impact areas.\"\n",
    "        ),\n",
    "        llm_config=adv_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "    MAX_ITERATIONS = 1\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) == 0:\n",
    "            return advantage_generator\n",
    "\n",
    "        if last_speaker is advantage_generator:\n",
    "            return debate_search_agent\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return advantage_reviewer\n",
    "\n",
    "        if last_speaker is advantage_reviewer:\n",
    "            iterations += 1\n",
    "            try:\n",
    "                content = messages[-1][\"content\"]\n",
    "                if isinstance(content, dict):\n",
    "                    plan_ready = content.get(\"plan_ready\", \"False\")\n",
    "                else:\n",
    "                    import json\n",
    "                    plan_ready = json.loads(content).get(\"plan_ready\", \"False\")\n",
    "            except Exception:\n",
    "                plan_ready = \"False\"\n",
    "            if iterations >= MAX_ITERATIONS:\n",
    "                return None\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[advantage_generator, debate_search_agent, executor_agent, advantage_reviewer],\n",
    "        messages=[],\n",
    "        max_round=50,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Parse the debate_case string to extract plantext, harms, and inherency evidence\n",
    "    # For this rewrite, we assume debate_case contains all necessary info in a formatted string\n",
    "    # and we simply pass it as context to the prompt.\n",
    "\n",
    "    advantage_prompt = (\n",
    "        f\"Given the following debate case, generate 3 distinct, well-supported advantages. \"\n",
    "        f\"Do not generate multiple advantages that are similar or overlap in impact area. \"\n",
    "        f\"{debate_case}\\n\"\n",
    "        \"Use iterative literature review to ensure each advantage is specific, distinct, and well-supported. \"\n",
    "        \"Strive for maximum diversity and uniqueness in the types of advantages you generate, drawing on both classic policy debate categories and creative, well-supported impacts. \"\n",
    "        \"You are encouraged to generate highly progressive or novel arguments, including 'kritikal' (critical theory-based) advantages, but you should be just as likely to generate traditional, hardcore policy arguments. \"\n",
    "    )\n",
    "\n",
    "    chat_result = advantage_generator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=advantage_prompt,\n",
    "    )\n",
    "\n",
    "\n",
    "    advantage_output_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    advantages = json.loads(advantage_output_raw_string)[\"advantages\"]\n",
    "    # Return a list of string representations for each advantage\n",
    "    return [str(adv) for adv in advantages]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#advantages = generate_advantages(debate_case)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# advantage_1 = json.loads(json.dumps(ast.literal_eval(advantages[0])))\n",
    "# advantage_1_title = advantage_1[\"title\"]\n",
    "# advantage_1_core_argument = advantage_1[\"core_argument\"]\n",
    "\n",
    "# # Append advantage 1 title and core argument to debate_case using h2, div, and p tags\n",
    "# debate_case += (\n",
    "#     f\"\\n<h2>Advantage 1: {advantage_1_title}</h2>\"\n",
    "#     f\"\\n<div><p>{advantage_1_core_argument}</p></div>\"\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Advantage Uniqueness"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_advantage_uniqueness_to_case(debate_case, advantage_number: str):\n",
    "    \"\"\"\n",
    "    Given a debate_case string and an advantage_number (as a string, e.g., \"1\", \"2\", etc.),\n",
    "    finds the best uniqueness evidence for the specified advantage and appends it to the debate_case using h2, div, and p tags.\n",
    "    The output includes the retagged argument as read out loud in the debate round and the card itself, but not the reason.\n",
    "    Returns the modified debate_case string.\n",
    "    \"\"\"\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater focused on finding the best possible evidence of uniqueness for a specific advantage in a policy debate case. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Break down the plan, inherency, harm, and advantage {advantage_number} into their key components and causal relationships.\\n\"\n",
    "            f\"2. Guide evidence collection by:\\n\"\n",
    "            f\"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the current status quo regarding the impact area of advantage {advantage_number} (i.e., uniqueness). \"\n",
    "            f\"   - Using BM25 search to find relevant cards from a debate evidence database (cutoff year 2022).\\n\"\n",
    "            f\"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific uniqueness evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            f\"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the uniqueness claim for advantage {advantage_number}.\\n\"\n",
    "            f\"3. Evaluate evidence quality for:\\n\"\n",
    "            f\"   - Direct, explicit support for the uniqueness claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            f\"   - Specificity: The evidence must establish the current state of affairs in the impact area of advantage {advantage_number}, and explain why the impact is not already occurring or is not inevitable absent the plan.\\n\"\n",
    "            f\"   - Empirical support and authoritativeness.\\n\"\n",
    "            f\"Reject any evidence that does not fully and directly support the uniqueness of advantage {advantage_number} or that could be interpreted as generic or non-specific. \"\n",
    "            f\"Your goal is to find the strictest, most advantage- and plan-relevant uniqueness evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "            f\"\\n\\nIMPORTANT: Do NOT select or use any evidence (by cite, author, or content) that has already appeared in the current debate_case string below. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as uniqueness evidence supporting a specific advantage in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            f\"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the uniqueness claim for advantage {advantage_number})\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the advantage's uniqueness, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the current state of affairs in the impact area of advantage {advantage_number})\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the uniqueness link for advantage {advantage_number})\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the advantage's uniqueness link chains\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly advantage- and plan-relevant for uniqueness\\n\"\n",
    "            f\"5. IMPORTANT: Do NOT approve or select any evidence (by cite, author, or content) that has already appeared in the current debate_case string below. You must only approve new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, advantage- and plan-specific uniqueness evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the plan_string, which includes the debate topic, the plan, inherency, harm, and the specified advantage, for context\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            f\"Find the best, most plan- and advantage-specific evidence of uniqueness supporting advantage {advantage_number} articulated above. \"\n",
    "            f\"Only consider evidence that directly and specifically supports the claim that the impact area of advantage {advantage_number} is not already occurring, is not inevitable, or is not being solved in the status quo. \"\n",
    "            f\"Reject any evidence that is generic, tangential, or not relevant to the uniqueness of advantage {advantage_number}. \"\n",
    "            f\"\\n\\nIMPORTANT: Do NOT select or use any evidence (by cite, author, or content) that has already appeared in the current debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    advantage_uniqueness_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(advantage_uniqueness_raw_string)[\"cards\"][0]\n",
    "    advantage_uniqueness_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "    # Get the card document\n",
    "    advantage_uniqueness_doc = get_document_by_id(advantage_uniqueness_id)\n",
    "    card_markup = str(advantage_uniqueness_doc['markup'])\n",
    "\n",
    "    # Append to debate_case using h2, div, and p tags\n",
    "    debate_case += (\n",
    "        f\"\\n<h2>Advantage {advantage_number} Uniqueness</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return debate_case\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#debate_case = add_advantage_uniqueness_to_case(debate_case, \"1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Advantage Link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_advantage_link_to_case(debate_case, advantage_number: str):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater focused on finding the best possible evidence of a causal link for advantage {advantage_number} in a policy debate case. \"\n",
    "            f\"The plantext, inherency, harm, advantage {advantage_number}, and the uniqueness evidence for advantage {advantage_number} have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            f\"1. Break down the plan, inherency, harm, advantage {advantage_number}, and the uniqueness evidence into their key components and causal relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            f\"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates that the plan, if enacted, would cause the impact described in advantage {advantage_number} (i.e., link evidence). \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific link evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the link claim for the advantage.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the link claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            f\"   - Specificity: The evidence must establish that the plan, as proposed, will cause the impact described in advantage {advantage_number}, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            f\"Reject any evidence that does not fully and directly support the link between the plan and the impact of advantage {advantage_number}, or that could be interpreted as generic or non-specific. \"\n",
    "            \"IMPORTANT: Do NOT select or use any evidence (by cite, author, or content) that has already appeared in the current debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\"\n",
    "            \" Your goal is to find the strictest, most advantage- and plan-relevant link evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as link evidence supporting advantage {advantage_number} in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the link claim for advantage {advantage_number})\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the advantage's link, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the plan, as proposed, will cause the impact described in advantage {advantage_number})\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the link for the advantage)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the advantage's link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly advantage- and plan-relevant for the link\\n\"\n",
    "            \"5. IMPORTANT: Do NOT select or approve any evidence (by cite, author, or content) that has already appeared in the current debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, advantage- and plan-specific link evidence for advantage {advantage_number}, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=(\n",
    "            \"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards. \"\n",
    "            \"IMPORTANT: Do NOT search for or select any evidence (by cite, author, or content) that has already appeared in the current debate_case string above. You must only search for and select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\"\n",
    "        ),\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, inherency, harm, advantage N, and advantage N uniqueness evidence for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            f\"The inherency, harm, advantage {advantage_number}, and advantage {advantage_number} uniqueness evidence have already been established above.\\n\"\n",
    "            f\"Find the best and most plan- and advantage-specific evidence of a causal link supporting advantage {advantage_number} articulated above. \"\n",
    "            f\"Only consider evidence that directly and specifically supports the claim that the plan, if enacted, would cause the impact described in advantage {advantage_number}. \"\n",
    "            f\"Reject any evidence that is generic, tangential, or not relevant to the link between the plan and advantage {advantage_number}. \"\n",
    "            f\"\\n\\nIMPORTANT: Do NOT select or use any evidence (by cite, author, or content) that has already appeared in the current debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition.\"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    advantage_link_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    advantage_link_json = json.loads(advantage_link_raw_string)\n",
    "    card_json = advantage_link_json[\"cards\"][0]\n",
    "    advantage_link_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    advantage_link_doc = get_document_by_id(advantage_link_id)\n",
    "    card_markup = str(advantage_link_doc['markup'])\n",
    "\n",
    "    # Append to debate_case using h2, div, and p tags\n",
    "    debate_case += (\n",
    "        f\"\\n<h2>Advantage {advantage_number} Link</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return debate_case"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#debate_case = add_advantage_link_to_case(debate_case, \"1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Advantage Internal Link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_advantage_internal_link_to_case(debate_case, advantage_number: str):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater focused on finding the best possible evidence of an internal link for advantage {advantage_number} in a policy debate case. \"\n",
    "            f\"The plantext, inherency, harm, advantage {advantage_number}, the uniqueness evidence for advantage {advantage_number}, and the link evidence for advantage {advantage_number} have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            f\"1. Break down the plan, inherency, harm, advantage {advantage_number}, uniqueness evidence, and the link evidence into their key components and causal relationships, tracing the internal link chain to the impact.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            f\"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the internal link(s) between the link and the impact described in advantage {advantage_number} (i.e., internal link evidence). \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific internal link evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the internal link claim for the advantage.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the internal link claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            f\"   - Specificity: The evidence must establish that the internal link(s) in the causal chain for advantage {advantage_number} are true, and explain the mechanism by which the link leads to the impact.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            f\"Reject any evidence that does not fully and directly support the internal link(s) between the link and the impact of advantage {advantage_number}, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most advantage- and plan-relevant internal link evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence in the case. \"\n",
    "            \"IMPORTANT: Do NOT select or search for any card (by id, cite, or content) that already appears in the current debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not include it and do not search for it again.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as internal link evidence supporting advantage {advantage_number} in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the internal link claim for advantage {advantage_number})\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the advantage's internal link, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the internal link(s) in the causal chain for advantage {advantage_number} are true and relevant)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the internal link for the advantage)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards or any card that already appears in the current debate_case (by id, cite, or content). If a card has already appeared in the debate_case, do not include it and do not search for it again.\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the advantage's internal link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly advantage- and plan-relevant for the internal link\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, advantage- and plan-specific internal link evidence for advantage {advantage_number}, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=(\n",
    "            \"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards. \"\n",
    "            \"IMPORTANT: Do NOT search for or return any evidence (by id, cite, or content) that already appears in the current debate_case string above. Only search for and return new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not include it and do not search for it again.\"\n",
    "        ),\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, inherency, harm, advantage N, advantage N uniqueness evidence, and link evidence for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            f\"The inherency, harm, advantage {advantage_number}, advantage {advantage_number} uniqueness evidence, and link evidence have already been established above.\\n\"\n",
    "            f\"Find the best and most plan- and advantage-specific evidence of an internal link supporting advantage {advantage_number} articulated above. \"\n",
    "            f\"Only consider evidence that directly and specifically supports the claim that the internal link(s) in the causal chain for advantage {advantage_number} are true and connect the link to the impact. \"\n",
    "            f\"Reject any evidence that is generic, tangential, not relevant to the internal link(s) between the link and the impact for advantage {advantage_number}, or that already appears in the current debate_case (by id, cite, or content). \"\n",
    "            f\"IMPORTANT: Do NOT select or search for any evidence (by id, cite, or content) that has already appeared in the debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not include it and do not search for it again.\"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    advantage_internal_link_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    advantage_internal_link_json = json.loads(advantage_internal_link_raw_string)\n",
    "    card_json = advantage_internal_link_json[\"cards\"][0]\n",
    "    advantage_internal_link_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    advantage_internal_link_doc = get_document_by_id(advantage_internal_link_id)\n",
    "    card_markup = str(advantage_internal_link_doc['markup'])\n",
    "\n",
    "    # Append to debate_case using h2, div, and p tags\n",
    "    debate_case += (\n",
    "        f\"\\n<h2>Advantage {advantage_number} Internal Link</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return debate_case"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Advantage Impact"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "    def add_advantage_impact_to_case(debate_case, advantage_number: str):\n",
    "        class DebateCard(BaseModel):\n",
    "            id: int\n",
    "            cite: str\n",
    "            include_in_case: Literal[\"include_it\", \"False\"]\n",
    "            reason_to_include: str\n",
    "            retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "        class DebateCardSearchResult(BaseModel):\n",
    "            cards: List[DebateCard] = Field(..., min_items=2, max_items=1)\n",
    "\n",
    "        llm_config = LLMConfig(\n",
    "            api_type=\"openai\",\n",
    "            model=\"gpt-4.1-mini\",\n",
    "            api_key=OPENAI_API_KEY,\n",
    "            temperature=2.0,\n",
    "            top_p=0.9,\n",
    "            parallel_tool_calls=None\n",
    "        )\n",
    "        required_llm_config = LLMConfig(\n",
    "            api_type=\"openai\",\n",
    "            model=\"gpt-4.1-mini\",\n",
    "            api_key=OPENAI_API_KEY,\n",
    "            tool_choice=\"required\",\n",
    "            temperature=2.0,\n",
    "            top_p=0.9,\n",
    "            parallel_tool_calls=None\n",
    "        )\n",
    "        debate_eval_llm_config = LLMConfig(\n",
    "            api_type=\"openai\",\n",
    "            model=\"gpt-4.1-mini\",\n",
    "            api_key=OPENAI_API_KEY,\n",
    "            response_format=DebateCardSearchResult,\n",
    "            temperature=2.0,\n",
    "            top_p=0.9,\n",
    "            parallel_tool_calls=None\n",
    "        )\n",
    "\n",
    "        argument_evaluator = ConversableAgent(\n",
    "            name=\"argument_evaluator\", \n",
    "            system_message=(\n",
    "                f\"You are an expert policy debater focused on finding the best possible evidence of the impact for advantage {advantage_number} in a policy debate case. \"\n",
    "                f\"The plantext, inherency, harm, advantage {advantage_number}, the uniqueness evidence for advantage {advantage_number}, and the link evidence for advantage {advantage_number} have already been provided. \"\n",
    "                \"Your job is to:\\n\"\n",
    "                f\"1. Break down the plan, inherency, harm, advantage {advantage_number}, uniqueness evidence, and the link evidence into their key components and causal relationships, tracing the full internal link chain to the impact.\\n\"\n",
    "                \"2. Guide evidence collection by:\\n\"\n",
    "                f\"   - Formulating extremely precise search queries that target evidence which demonstrates the most significant, maximized impact that could result from the link chain established for advantage {advantage_number} (i.e., impact evidence). \"\n",
    "                \"   - Strongly prefer evidence that supports the largest, most policy-relevant, and extreme impacts possible (such as nuclear war, extinction, existential risk, or catastrophic loss of value to life), even if the connection to the plan and link evidence is indirect or only loosely established. \"\n",
    "                \"   - It is acceptable to include evidence of extreme impacts (especially extinction-style impacts) even if the causal connection to the plan is indirect, as long as there is a plausible link chain. \"\n",
    "                \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "                \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific impact evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "                \"   - Suggest query refinements to maximize the chance of finding evidence that is high quality and supports the maximized impact claim for the advantage.\\n\"\n",
    "                \"3. Evaluate evidence quality for:\\n\"\n",
    "                \"   - Explicit support for the impact claim (evidence should not merely be generic background, but the connection to the plan can be indirect if the impact is extreme).\\n\"\n",
    "                f\"   - Specificity: The evidence should ideally establish that the plan, as proposed, will cause the maximized impact described in advantage {advantage_number}, but for extinction or similar extreme impacts, an indirect or plausible connection is sufficient.\\n\"\n",
    "                \"   - Empirical support and authoritativeness.\\n\"\n",
    "                \"   - Magnitude: Strongly prefer evidence that supports the largest, most significant impacts (e.g., extinction, nuclear war, existential risk, catastrophic loss of value to life), even if the impact is only indirectly connected to the advantage's link chain.\\n\"\n",
    "                \"Reject any evidence that is generic, non-specific, or completely disconnected from the established link chain, but be willing to accept indirect or plausible connections for extreme impacts. \"\n",
    "                \"IMPORTANT: Do NOT select or search for any evidence (by id, cite, or content) that has already appeared in the debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not include it and do not search for it again. \"\n",
    "                \"Your goal is to find the strictest, most advantage- and plan-relevant impact evidence possible, maximizing the magnitude of the impact, and it is acceptable to include evidence of extinction or similar extreme impacts even if the connection to the plan is indirect, as long as it is not wholly implausible or generic. Ensure each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "            ),\n",
    "            llm_config=llm_config,\n",
    "        )\n",
    "\n",
    "        debate_eval_agent = ConversableAgent(\n",
    "            name=\"debate_eval_agent\",\n",
    "            system_message=(\n",
    "                \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "                \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as impact evidence supporting a specific advantage in policy debate. \"\n",
    "                \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "                \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "                \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "                f\"- Relevance (must support the maximized impact claim for advantage {advantage_number}, and be at least plausibly connected to the established link chain; for extinction or similar extreme impacts, an indirect or plausible connection is sufficient)\\n\"\n",
    "                \"- Strategic value (must provide unique and compelling support for the advantage's impact, not just generic background)\\n\"\n",
    "                f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and should ideally establish that the plan, as proposed, will cause the maximized impact described in advantage {advantage_number}; for extreme impacts, indirect or plausible connections are acceptable)\\n\"\n",
    "                \"- Magnitude (strongly prefer evidence that supports the largest, most significant impacts—such as extinction, nuclear war, existential risk, or catastrophic loss of value to life—even if those impacts are only indirectly or plausibly connected to the advantage's link chain)\\n\"\n",
    "                \"- Wording precision (must use exact terminology needed to establish the impact for the advantage)\\n\\n\"\n",
    "                \"After evaluating the evidence, you must:\\n\"\n",
    "                \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "                \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "                \"3. Ensure terminology precisely matches what's needed for the advantage's impact chain\\n\"\n",
    "                \"4. Only approve evidence that meets ALL evaluation criteria and is strictly advantage- and plan-relevant for the maximized impact, but for extinction or similar extreme impacts, an indirect or plausible connection is sufficient\\n\\n\"\n",
    "                \"IMPORTANT: Do NOT select or approve any evidence (by id, cite, or content) that has already appeared in the debate_case string above. You must only approve new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not include it and do not approve it again.\\n\"\n",
    "                \"Your goal is to ensure we have the highest quality, advantage- and plan-specific impact evidence, maximizing the magnitude of the impact while ensuring absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup. For extinction or similar extreme impacts, err on the side of inclusion even if the connection to the plan is indirect, as long as it is not wholly implausible or generic.\"\n",
    "            ),\n",
    "            llm_config=debate_eval_llm_config,\n",
    "        )\n",
    "\n",
    "        debate_search_agent = ConversableAgent(\n",
    "            name=\"debate_search_agent\",\n",
    "            system_message=(\n",
    "                \"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards. \"\n",
    "                \"IMPORTANT: Do NOT search for or return any evidence (by id, cite, or content) that has already appeared in the debate_case string above. You must only search for and return new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not search for it again and do not return it.\"\n",
    "            ),\n",
    "            llm_config=required_llm_config,\n",
    "        )\n",
    "\n",
    "        executor_agent = ConversableAgent(\n",
    "            name=\"executor_agent\",\n",
    "            human_input_mode=\"NEVER\",\n",
    "            llm_config=llm_config,\n",
    "        )\n",
    "\n",
    "        register_function(\n",
    "            search_debate_cards,\n",
    "            caller=debate_search_agent,\n",
    "            executor=executor_agent,\n",
    "            description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "        )\n",
    "\n",
    "        from autogen import GroupChat\n",
    "\n",
    "        iterations = 0\n",
    "\n",
    "        allowed_transitions = {\n",
    "            argument_evaluator: [debate_search_agent],\n",
    "            debate_search_agent: [executor_agent],\n",
    "            executor_agent: [argument_evaluator]\n",
    "        } # Not being used but a good example\n",
    "\n",
    "        def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "            nonlocal iterations\n",
    "            messages = groupchat.messages\n",
    "\n",
    "            if len(messages) <= 1:\n",
    "                return argument_evaluator\n",
    "\n",
    "            if last_speaker is debate_search_agent:\n",
    "                return executor_agent\n",
    "\n",
    "            if last_speaker is executor_agent:\n",
    "                return debate_eval_agent\n",
    "            \n",
    "            if last_speaker is debate_eval_agent:\n",
    "                if \"include_it\" in messages[-1][\"content\"]:\n",
    "                    iterations += 1\n",
    "                    print(f\"iterations: {iterations}\")\n",
    "                    if iterations >= 1:\n",
    "                        return None\n",
    "                    else:\n",
    "                        return debate_search_agent\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "\n",
    "            if last_speaker is argument_evaluator:\n",
    "                return debate_search_agent\n",
    "            else:\n",
    "                return \"round_robin\"\n",
    "\n",
    "        group_chat = GroupChat(\n",
    "            agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "            messages=[],\n",
    "            max_round=40,\n",
    "            speaker_selection_method=custom_speaker_selection_func\n",
    "        )\n",
    "\n",
    "        group_chat_manager = GroupChatManager(\n",
    "            groupchat=group_chat,\n",
    "            llm_config=llm_config,\n",
    "        )\n",
    "\n",
    "        # Use the debate_case, which includes the debate topic, the plan, inherency, harm, advantage N, advantage N uniqueness evidence, and advantage N link evidence for context\n",
    "\n",
    "        chat_result = argument_evaluator.initiate_chat(\n",
    "            group_chat_manager,\n",
    "            message=(\n",
    "                f\"{debate_case}\\n\\n\"\n",
    "                \"Assume that the current year is 2022.\\n\"\n",
    "                f\"The inherency, harm, advantage {advantage_number}, advantage {advantage_number} uniqueness evidence, and advantage {advantage_number} link evidence have already been established above.\\n\"\n",
    "                f\"Find the best, most plan- and advantage-specific evidence of the maximized impact supporting advantage {advantage_number} articulated above. \"\n",
    "                \"Strongly prefer evidence that supports the largest, most significant, and extreme impacts (such as nuclear war, extinction, existential risk, or catastrophic loss of value to life), even if those impacts are only indirectly or plausibly connected to the advantage's link chain. \"\n",
    "                \"It is acceptable to include evidence of extreme impacts even if the connection to the plan is indirect, as long as it is not wholly implausible or generic. \"\n",
    "                f\"Only consider evidence that supports the claim that the plan, if enacted, would cause the maximized impact described in advantage {advantage_number}, but for extinction or similar extreme impacts, an indirect or plausible connection is sufficient. \"\n",
    "                f\"Reject any evidence that is generic, completely tangential, or wholly disconnected from the established link chain for advantage {advantage_number}. \"\n",
    "                \"IMPORTANT: Do NOT select or search for any evidence (by id, cite, or content) that has already appeared in the debate_case string above. You must only select new, previously unused evidence. Carefully check the debate_case string for all previously included evidence and avoid any repetition. If a card has already appeared in the debate_case, do not include it and do not search for it again.\"\n",
    "            ),\n",
    "        )\n",
    "\n",
    "        advantage_impact_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "        advantage_impact_json = json.loads(advantage_impact_raw_string)\n",
    "        card_json = advantage_impact_json[\"cards\"][0]\n",
    "        advantage_impact_id = card_json[\"id\"]\n",
    "        retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "        advantage_impact_doc = get_document_by_id(advantage_impact_id)\n",
    "        card_markup = str(advantage_impact_doc['markup'])\n",
    "\n",
    "        # Append to debate_case using h2, div, and p tags\n",
    "        debate_case += (\n",
    "            f\"\\n<h2>Advantage {advantage_number} Impact</h2>\"\n",
    "            f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "            f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "        )\n",
    "        return debate_case"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#debate_case = add_advantage_impact_to_case(debate_case, \"1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import json\n",
    "# import ast\n",
    "\n",
    "# advantage_2 = json.loads(json.dumps(ast.literal_eval(advantages[1])))\n",
    "# advantage_2_title = advantage_2[\"title\"]\n",
    "# advantage_2_core_argument = advantage_2[\"core_argument\"]\n",
    "\n",
    "# # Append advantage 2 title and core argument to debate_case using h2, div, and p tags\n",
    "# debate_case += (\n",
    "#     f\"\\n<h2>Advantage 2: {advantage_2_title}</h2>\"\n",
    "#     f\"\\n<div><p>{advantage_2_core_argument}</p></div>\"\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# debate_case = add_advantage_uniqueness_to_case(debate_case, \"2\")\n",
    "# debate_case = add_advantage_link_to_case(debate_case, \"2\")\n",
    "# debate_case = add_advantage_impact_to_case(debate_case, \"2\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import json\n",
    "# import ast\n",
    "\n",
    "# advantage_3 = json.loads(json.dumps(ast.literal_eval(advantages[2])))\n",
    "# advantage_3_title = advantage_3[\"title\"]\n",
    "# advantage_3_core_argument = advantage_3[\"core_argument\"]\n",
    "\n",
    "# # Append advantage 3 title and core argument to debate_case using h2, div, and p tags\n",
    "# debate_case += (\n",
    "#     f\"\\n<h2>Advantage 3: {advantage_3_title}</h2>\"\n",
    "#     f\"\\n<div><p>{advantage_3_core_argument}</p></div>\"\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# debate_case = add_advantage_uniqueness_to_case(debate_case, \"3\")\n",
    "# debate_case = add_advantage_link_to_case(debate_case, \"3\")\n",
    "# debate_case = add_advantage_impact_to_case(debate_case, \"3\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Solvency Workflow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_solvency_card_to_case(debate_case):\n",
    "    \"\"\"\n",
    "    Runs the agent group chat to find the best solvency card, and appends the retagged argument and card markup to the debate_case string.\n",
    "    Returns the updated debate_case.\n",
    "    \"\"\"\n",
    "    # --- Agent and config setup (relaxed recency requirements) ---\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Relaxed recency requirements in system messages\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of plan solvency to support a specific plan and its articulated harm. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan and the chosen harm into the exact mechanisms, actions, and causal processes by which the plan would solve or significantly reduce the harm.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates that the plan, if implemented, would solve or substantially mitigate the articulated harm (i.e., solvency).\\n\"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the solvency claim for the plan and harm.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the solvency claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the plan, as proposed, will solve or significantly reduce the harm, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the solvency of the plan for the articulated harm or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most plan- and harm-relevant solvency evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "            \"\\n\\nIMPORTANT: Do NOT select or search for any evidence that has already appeared in the current debate case. If a card or evidence is already present in the debate_case, it must not be included or searched for again.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as solvency evidence supporting a specific plan and harm in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the plan's solvency claim for the articulated harm)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the plan's ability to solve the harm, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the plan, as proposed, will solve or significantly reduce the harm)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the solvency link for the plan and harm)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the plan's solvency link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly plan- and harm-relevant for solvency\\n\"\n",
    "            \"5. IMPORTANT: Do NOT approve or allow any evidence that has already appeared in the current debate case. If a card or evidence is already present in the debate_case, it must not be included or searched for again.\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, plan- and harm-specific solvency evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=(\n",
    "            \"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards. \"\n",
    "            \"IMPORTANT: Do NOT search for or return any evidence that has already appeared in the current debate case. If a card or evidence is already present in the debate_case, it must not be included or searched for again.\"\n",
    "        ),\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        global iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Prompt: recency requirement removed, and now also instructs not to select or search for evidence already in the debate_case\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            \"Find the best, most plan- and harm-specific evidence of solvency supporting the plan and the articulated harm above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the plan, if implemented, will solve or substantially reduce the harm (i.e., solvency). \"\n",
    "            \"Reject any evidence that is generic, tangential, not relevant to the plan's solvency for the articulated harm. \"\n",
    "            \"IMPORTANT: Do NOT select, search for, or include any evidence that has already appeared in the current debate case. If a card or evidence is already present in the debate_case, it must not be included or searched for again.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    import json\n",
    "    solvency_output_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_data = json.loads(solvency_output_raw_string)[\"cards\"][0]\n",
    "    solvency_output_id = card_data[\"id\"]\n",
    "    retagged_argument = card_data[\"retagged_argument_as_read_outloud_in_the_debate_round\"]\n",
    "\n",
    "    doc = get_document_by_id(solvency_output_id)\n",
    "\n",
    "    # Append to debate_case using h2, div, and <p> tags\n",
    "    debate_case += (\n",
    "        f\"\\n<h2>Solvency Argument</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<h2>Solvency Card</h2>\"\n",
    "        f\"\\n<div>{doc['markup']}</div>\"\n",
    "    )\n",
    "\n",
    "    return debate_case\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#debate_case = add_solvency_card_to_case(debate_case)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1AC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import ast\n",
    "\n",
    "def try_n_times(func, *args, n=3, **kwargs):\n",
    "    last_exception = None\n",
    "    for attempt in range(n):\n",
    "        try:\n",
    "            return func(*args, **kwargs)\n",
    "        except Exception as e:\n",
    "            last_exception = e\n",
    "            if attempt == n - 1:\n",
    "                raise\n",
    "    if last_exception:\n",
    "        raise last_exception\n",
    "\n",
    "plantext_output = try_n_times(generate_plantext_for_topic, debate_topic)\n",
    "debate_case = try_n_times(generate_plan_with_harm_evidence, plantext_output, debate_topic)\n",
    "debate_case = try_n_times(append_inherency_argument_and_evidence, debate_case)\n",
    "advantages = try_n_times(generate_advantages, debate_case)\n",
    "\n",
    "for i in range(3):\n",
    "    # Try to parse the advantage up to 3 times\n",
    "    for attempt in range(3):\n",
    "        try:\n",
    "            advantage = json.loads(json.dumps(ast.literal_eval(advantages[i])))\n",
    "            break\n",
    "        except Exception as e:\n",
    "            if attempt == 2:\n",
    "                raise\n",
    "    advantage_title = advantage[\"title\"]\n",
    "    advantage_core_argument = advantage[\"core_argument\"]\n",
    "\n",
    "    # Append advantage title and core argument to debate_case using h2, div, and p tags\n",
    "    debate_case += (\n",
    "        f\"\\n<h2>Advantage {i+1}: {advantage_title}</h2>\"\n",
    "        f\"\\n<div><p>{advantage_core_argument}</p></div>\"\n",
    "    )\n",
    "\n",
    "    debate_case = try_n_times(add_advantage_uniqueness_to_case, debate_case, str(i+1))\n",
    "    debate_case = try_n_times(add_advantage_link_to_case, debate_case, str(i+1))\n",
    "    debate_case = try_n_times(add_advantage_internal_link_to_case, debate_case, str(i+1))\n",
    "    debate_case = try_n_times(add_advantage_impact_to_case, debate_case, str(i+1))\n",
    "\n",
    "debate_case = try_n_times(add_solvency_card_to_case, debate_case)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "debate_case"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cross Examination (of the 1AC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate_1nc_cross_examination(debate_case: str) -> str:\n",
    "    from typing import List\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    # Define the structure for a cross-examination question and answer\n",
    "    class CrossExQuestion(BaseModel):\n",
    "        question: str\n",
    "\n",
    "    class CrossExAnswer(BaseModel):\n",
    "        answer: str\n",
    "\n",
    "    class CrossExPair(BaseModel):\n",
    "        negative_question: str\n",
    "        affirmative_response: str\n",
    "\n",
    "    class CrossExamination(BaseModel):\n",
    "        cross_ex: List[CrossExPair] = Field(..., min_items=4, max_items=4)\n",
    "\n",
    "    # LLM config for all agents\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    cross_ex_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=CrossExamination,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent 1: Negative (1NC) asks questions\n",
    "    negative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"negative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 1NC (negative) debater in a policy debate cross-examination. \"\n",
    "            \"Your job is to ask sharp, strategic, and challenging questions about the 1AC (affirmative case) just presented. \"\n",
    "            \"Focus on exposing weaknesses, ambiguities, or assumptions in the plan, inherency, harms, advantages, and solvency. \"\n",
    "            \"Ask one question at a time, and wait for the affirmative to answer before asking the next. \"\n",
    "            \"Do not answer your own questions. \"\n",
    "            \"Be concise and direct. \"\n",
    "            \"Do not repeat questions. \"\n",
    "            \"You will ask a total of 3 to 7 questions.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 2: Affirmative answers\n",
    "    affirmative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"affirmative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 1AC (affirmative) debater being cross-examined by the 1NC (negative) in a policy debate. \"\n",
    "            \"Your job is to answer each question as clearly, persuasively, and strategically as possible, defending the affirmative case. \"\n",
    "            \"Respond directly to the negative's question, but do not volunteer extra information. \"\n",
    "            \"Be concise and avoid rambling. \"\n",
    "            \"Do not ask questions yourself.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 3: Cross-ex summary agent (outputs the structured Q&A)\n",
    "    cross_ex_summary_agent = ConversableAgent(\n",
    "        name=\"cross_ex_summary_agent\",\n",
    "        system_message=(\n",
    "            \"You are a debate judge summarizing the 1NC cross-examination of the 1AC. \"\n",
    "            \"Your job is to produce a structured list of question/answer pairs, each with a 'negative_question' and an 'affirmative_response', \"\n",
    "            \"covering the full cross-examination as it occurred. \"\n",
    "            \"Return the result as a list of 3 to 7 question/answer pairs, each clearly labeled.\"\n",
    "        ),\n",
    "        llm_config=cross_ex_llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    cross_ex_iterations = 0  # Track the number of Q&A iterations (outside the function)\n",
    "\n",
    "    def cross_ex_speaker_selection(last_speaker, groupchat):\n",
    "        nonlocal cross_ex_iterations\n",
    "        # Alternate between negative and affirmative, then finish with summary agent\n",
    "        if cross_ex_iterations == 0 and last_speaker is None:\n",
    "            return negative_cross_ex_agent\n",
    "        if last_speaker is negative_cross_ex_agent:\n",
    "            return affirmative_cross_ex_agent\n",
    "        if last_speaker is affirmative_cross_ex_agent:\n",
    "            cross_ex_iterations += 1\n",
    "            if cross_ex_iterations >= 4:\n",
    "                return cross_ex_summary_agent\n",
    "            else:\n",
    "                return negative_cross_ex_agent\n",
    "        if last_speaker is cross_ex_summary_agent:\n",
    "            return None\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[negative_cross_ex_agent, affirmative_cross_ex_agent, cross_ex_summary_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=cross_ex_speaker_selection\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # The context for the cross-examination is the debate_case (the 1AC)\n",
    "    chat_result = affirmative_cross_ex_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"You are about to begin the 1NC cross-examination of the 1AC. The 1AC is as follows:\\n\\n\"\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            \"Begin by asking your first question.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # The summary agent's output is the last message in the chat history\n",
    "    cross_ex_json = chat_result.chat_history[-1][\"content\"]\n",
    "    cross_ex_data = json.loads(cross_ex_json)\n",
    "    cross_ex_pairs = cross_ex_data[\"cross_ex\"]\n",
    "\n",
    "    # Format as HTML for display\n",
    "    html = \"<h2>1NC Cross-Examination of the 1AC</h2>\\n\"\n",
    "    for i, pair in enumerate(cross_ex_pairs, 1):\n",
    "        html += f\"<div><b>Negative Question {i}:</b> {pair['negative_question']}</div>\\n\"\n",
    "        html += f\"<div><b>Affirmative Response {i}:</b> {pair['affirmative_response']}</div>\\n\"\n",
    "        html += \"<br/>\\n\"\n",
    "\n",
    "    return html\n",
    "\n",
    "# For compatibility with the rest of the code, assign to 1ac_crossex_html\n",
    "# Usage: 1ac_crossex_html = simulate_1nc_cross_examination(debate_case)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_attempts = 3\n",
    "for attempt in range(1, max_attempts + 1):\n",
    "    try:\n",
    "        first_cx_transcript = simulate_1nc_cross_examination(debate_case)\n",
    "        break\n",
    "    except Exception as e:\n",
    "        if attempt == max_attempts:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "debate_case = debate_case + first_cx_transcript"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(debate_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  Negative Workflows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_negative_offcase(affirmative_case: str):\n",
    "\n",
    "    from typing import List, Optional\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    class Topicality(BaseModel):\n",
    "        title: str\n",
    "        core_argument_summary_as_spoken_outloud_in_debate_round: str\n",
    "\n",
    "    class Theory(BaseModel):\n",
    "        \"\"\"\n",
    "        Debate theory argument (e.g., conditionality, severance, intrinsicness, etc.),\n",
    "        not a general philosophical theory. This should be a procedural or theoretical\n",
    "        objection to the structure or practices of the affirmative or negative, such as\n",
    "        conditionality, multiple conditional counterplans, plan-inclusive counterplans, etc.\n",
    "        All theory arguments must be designed to help the negative win the debate round and\n",
    "        should directly oppose or undermine the plan or the affirmative's advocacy.\n",
    "        \"\"\"\n",
    "        title: str\n",
    "        core_argument_summary_as_spoken_outloud_in_debate_round: str\n",
    "\n",
    "    class Disadvantage(BaseModel):\n",
    "        title: str\n",
    "        core_argument_summary_as_spoken_outloud_in_debate_round: str\n",
    "\n",
    "    class Counterplan(BaseModel):\n",
    "        title: str\n",
    "        core_argument_summary_as_spoken_outloud_in_debate_round: str\n",
    "        counterplan_text: str\n",
    "\n",
    "    class Kritik(BaseModel):\n",
    "        \"\"\"\n",
    "        Debate kritik: a negative position that is often philosophical or ethical in nature,\n",
    "        challenging the underlying assumptions, frameworks, or ideologies of the affirmative case.\n",
    "        Kritiks should include a clear alternative text, which is a formalized position (like a counterplan text)\n",
    "        that usually negates or rejects the kinds of thinking or assumptions made by the affirmative.\n",
    "        The alternative text should be a specific, formal statement of what the negative advocates instead.\n",
    "        Kritiks must be distinct from disadvantages and counterplans.\n",
    "        All kritiks must be designed to negate or oppose the plan and help the negative win the round.\n",
    "        \"\"\"\n",
    "        title: str\n",
    "        core_argument_summary_as_spoken_outloud_in_debate_round: str\n",
    "        alternative_text: str\n",
    "\n",
    "    class NegativePositions(BaseModel):\n",
    "        topicality: Topicality\n",
    "        theory: Theory\n",
    "        # topicality: Optional[Topicality]\n",
    "        # theory: Optional[Theory]\n",
    "        disadvantages: List[Disadvantage] = Field(..., min_items=1, max_items=1)\n",
    "        counterplans: List[Counterplan] = Field(..., min_items=1, max_items=1)\n",
    "        kritiks: List[Kritik] = Field(..., min_items=1, max_items=1)\n",
    "        rationale: str\n",
    "        advice_for_next_search: str\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2.0,\n",
    "        top_p=0.8\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2.0,\n",
    "        top_p=0.8\n",
    "    )\n",
    "    neg_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=NegativePositions,\n",
    "        parallel_tool_calls=None,\n",
    "        temperature=2.0,\n",
    "        top_p=0.8\n",
    "    )\n",
    "\n",
    "    neg_generator = ConversableAgent(\n",
    "        name=\"neg_generator\",\n",
    "        system_message=(\n",
    "            \"You are a policy debate expert tasked with generating a set of off-case negative positions for a given affirmative case. \"\n",
    "            \"All positions you generate must be designed to negate and oppose the plan, and to help the negative win the debate round. \"\n",
    "            \"You must review the affirmative case and generate: \"\n",
    "            \"up to 1 topicality violation (only if it truly makes sense to do so), up to 1 theory argument (only if it truly makes sense to do so; theory means debate theory such as conditionality, severance, intrinsicness, etc.), at least 1 disadvantage, at least 1 counterplan (with counterplan text), and at least 1 kritik (with alternative text). \"\n",
    "            \"Kritiks are debate kritiks—positions that are often philosophical or ethical in nature, challenging the underlying assumptions, frameworks, or ideologies of the affirmative case. Each kritik must include a clear alternative text, which is a formalized position (like a counterplan text) that usually negates or rejects the kinds of thinking or assumptions made by the affirmative. \"\n",
    "            \"Only include a topicality or theory argument if there is a clear, specific, and strategic reason to do so based on the content of the affirmative case, and only if it helps the negative win the round. \"\n",
    "            \"Each position should be clearly articulated, specific to the affirmative case, and represent a classic or creative negative strategy. \"\n",
    "            \"Disadvantages, counterplans, and kritiks should be distinct and non-redundant. \"\n",
    "            \"The counterplan must include a counterplan text. The kritik must include an alternative text. \"\n",
    "            \"Strive for maximum diversity and uniqueness in the types of positions you generate, drawing on both classic policy debate categories and creative, well-supported arguments. \"\n",
    "            \"You may include progressive or novel arguments, but should also include traditional policy arguments. \"\n",
    "            \"All arguments, including theory and topicality, must be written to help the negative win the debate round and must directly oppose or undermine the plan or the affirmative's advocacy. \"\n",
    "            \"IMPORTANT: For topicality and theory, the 'core_argument_summary_as_spoken_outloud_in_debate_round' should be written as a natural language explanation of the argument, as you would say it out loud in a debate round, not as a list of 'interpretation, violation, reasons to prefer' or similar debate jargon. Instead, explain the argument in a way that would make sense to a layperson, focusing on the substance and reasoning behind the objection, as if you were speaking it in a debate round.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag or query. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    neg_reviewer = ConversableAgent(\n",
    "        name=\"neg_reviewer\",\n",
    "        system_message=(\n",
    "            \"You are a highly rigorous debate coach. Your job is to review the current set of negative positions, the rationale, and the evidence gathered so far. \"\n",
    "            \"For each position, assess whether it is likely to be well-supported, strategic, and distinct. \"\n",
    "            \"Give advice for the next search iteration for how to search for evidence to improve the negative positions given the current positions, evidence, and rationale. \"\n",
    "            \"After each search, update your positions and provide rationale and advice for the next search. \"\n",
    "            \"You may suggest rewording, combining, or splitting positions as needed to maximize clarity and support. \"\n",
    "            \"Encourage the generation of highly unique and diverse negative positions, including topicality, theory (debate theory such as conditionality, severance, etc.), disadvantages, counterplans, and kritiks (which are debate kritiks—often philosophical or ethical arguments challenging the assumptions or frameworks of the affirmative). \"\n",
    "            \"For kritiks, ensure the alternative is provided as an 'alternative text'—a formalized position (like a counterplan text) that negates or rejects the assumptions or frameworks of the affirmative. \"\n",
    "            \"All positions and arguments must be written to help the negative win the debate round and must directly oppose or undermine the plan or the affirmative's advocacy. \"\n",
    "            \"IMPORTANT: For topicality and theory, the 'core_argument_summary_as_spoken_outloud_in_debate_round' should be a natural language explanation of the argument, as you would say it out loud in a debate round, not a list of 'interpretation, violation, reasons to prefer' or similar debate jargon. The explanation should focus on the substance and reasoning behind the objection, as if explaining to a layperson and as if you were speaking it in a debate round.\"\n",
    "        ),\n",
    "        llm_config=neg_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "    MAX_ITERATIONS = 3\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) == 0:\n",
    "            return neg_generator\n",
    "\n",
    "        if last_speaker is neg_generator:\n",
    "            return debate_search_agent\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return neg_reviewer\n",
    "\n",
    "        if last_speaker is neg_reviewer:\n",
    "            iterations += 1\n",
    "            try:\n",
    "                content = messages[-1][\"content\"]\n",
    "                if isinstance(content, dict):\n",
    "                    plan_ready = content.get(\"plan_ready\", \"False\")\n",
    "                else:\n",
    "                    import json\n",
    "                    plan_ready = json.loads(content).get(\"plan_ready\", \"False\")\n",
    "            except Exception:\n",
    "                plan_ready = \"False\"\n",
    "            if iterations >= MAX_ITERATIONS:\n",
    "                return None\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[neg_generator, debate_search_agent, executor_agent, neg_reviewer],\n",
    "        messages=[],\n",
    "        max_round=50,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    neg_prompt = (\n",
    "        f\"Given the following affirmative debate case, generate a set of negative off-case positions. \"\n",
    "        f\"All positions must be designed to negate and oppose the plan, and to help the negative win the debate round. \"\n",
    "        f\"Only include a topicality violation or a theory argument if it truly makes sense to do so based on the content of the affirmative case, and only if it helps the negative win the round. \"\n",
    "        f\"(Theory means debate theory such as conditionality, severance, intrinsicness, etc. — not general philosophical theory.) \"\n",
    "        f\"At least one of each: disadvantage, counterplan (with counterplan text), and kritik (with alternative text) must be included. \"\n",
    "        f\"Kritiks are debate kritiks—positions that are often philosophical or ethical in nature, challenging the underlying assumptions, frameworks, or ideologies of the affirmative case. Each kritik must include a clear alternative text, which is a formalized position (like a counterplan text) that usually negates or rejects the kinds of thinking or assumptions made by the affirmative. \"\n",
    "        f\"Each position should be clearly articulated, specific to the affirmative case, and represent a classic or creative negative strategy. \"\n",
    "        f\"{affirmative_case}\\n\"\n",
    "        \"Strive for maximum diversity and uniqueness in the types of positions you generate, drawing on both classic policy debate categories and creative, well-supported arguments. \"\n",
    "        \"You may include progressive or novel arguments, but should also include traditional policy arguments. \"\n",
    "        \"All arguments, including theory and topicality, must be written to help the negative win the debate round and must directly oppose or undermine the plan or the affirmative's advocacy. \"\n",
    "        \"IMPORTANT: For topicality and theory, the 'core_argument_summary_as_spoken_outloud_in_debate_round' should be a natural language explanation of the argument, as you would say it out loud in a debate round, not a list of 'interpretation, violation, reasons to prefer' or similar debate jargon. Instead, explain the argument in a way that would make sense to a layperson, focusing on the substance and reasoning behind the objection, as if you were speaking it in a debate round.\"\n",
    "    )\n",
    "\n",
    "    chat_result = neg_generator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=neg_prompt,\n",
    "    )\n",
    "\n",
    "\n",
    "    neg_output_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    neg_positions = json.loads(neg_output_raw_string)\n",
    "\n",
    "    return neg_positions\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case = generate_negative_offcase(debate_case)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# topicality_title = negative_case['topicality']['title']\n",
    "# topicality_core_argument = negative_case['topicality']['core_argument']\n",
    "# negative_case_html = f\"<h2>{topicality_title}</h2>\\n<p>{topicality_core_argument}</p>\"\n",
    "# negative_case_html\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Topicality"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Topicality Interpretation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_topicality_interpretation_and_evidence(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given an affirmative debate_case string and a negative_case_html string,\n",
    "    finds the best formalized topicality interpretation and corresponding evidence to support it,\n",
    "    and appends both to the negative_case_html using h2, div, and p tags.\n",
    "    Returns the updated negative_case_html string.\n",
    "    \"\"\"\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        formalized_topicality_interpretation: str  # Formalized topicality interpretation text\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    topicality_interpretation_agent = ConversableAgent(\n",
    "        name=\"topicality_interpretation_agent\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater and debate coach. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Read the provided affirmative debate case and negative case HTML.\\n\"\n",
    "            f\"2. Formulate a formalized topicality interpretation (definition of a key word or phrase in the resolution or plan) that is strategic, precise, and would help the negative win the round. \"\n",
    "            f\"3. Write the interpretation in a formal debate style, including the word/phrase being defined, the definition, and a brief standards/violation explanation. \"\n",
    "            f\"4. Suggest the best possible search query to find evidence supporting this interpretation (e.g., legal, academic, or authoritative definitions or standards).\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    topicality_eval_agent = ConversableAgent(\n",
    "        name=\"topicality_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as support for a topicality interpretation in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            f\"- Empirical or doctrinal basis (must be supported by concrete data, legal precedent, or academic consensus)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the topicality interpretation)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the interpretation, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the definition or standard as interpreted)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the interpretation)\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the topicality interpretation\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly interpretation-relevant\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, interpretation-specific evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    topicality_search_agent = ConversableAgent(\n",
    "        name=\"topicality_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=topicality_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return topicality_interpretation_agent\n",
    "\n",
    "        if last_speaker is topicality_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return topicality_eval_agent\n",
    "        \n",
    "        if last_speaker is topicality_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return topicality_search_agent\n",
    "            else:\n",
    "                return topicality_search_agent\n",
    "\n",
    "        if last_speaker is topicality_interpretation_agent:\n",
    "            return topicality_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[topicality_interpretation_agent, topicality_search_agent, executor_agent, topicality_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case and negative_case_html for context\n",
    "    chat_result = topicality_interpretation_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Formulate a formalized topicality interpretation (definition of a key word or phrase in the resolution or plan) that is strategic, precise, and would help the negative win the round. \"\n",
    "            \"Write the interpretation in a formal debate style, including the word/phrase being defined, the definition, and a brief standards/violation explanation. \"\n",
    "            \"Then, find the best, most interpretation-specific evidence supporting this topicality interpretation. \"\n",
    "            \"Only consider evidence that directly and specifically supports the interpretation. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the interpretation.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    topicality_evidence_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(topicality_evidence_raw_string)[\"cards\"][0]\n",
    "    topicality_evidence_id = card_json[\"id\"]\n",
    "    formalized_interpretation = card_json.get(\"formalized_topicality_interpretation\", \"\")\n",
    "    # Get the card document\n",
    "    topicality_evidence_doc = get_document_by_id(topicality_evidence_id)\n",
    "    card_markup = str(topicality_evidence_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Topicality Interpretation and Evidence</h2>\"\n",
    "        f\"\\n<div><p>{formalized_interpretation}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_topicality_interpretation_and_evidence(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Topicality Violation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_topicality_violation(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given an affirmative debate_case string and a negative_case_html string (which already includes the topicality interpretation),\n",
    "    finds the best formalized topicality violation (explaining how the affirmative violates the interpretation) and appends it\n",
    "    to the negative_case_html using h2, div, and p tags.\n",
    "    Returns the updated negative_case_html string.\n",
    "    \"\"\"\n",
    "    class TopicalityViolationCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        formalized_topicality_violation: str  # Formalized topicality violation text\n",
    "\n",
    "    class TopicalityViolationSearchResult(BaseModel):\n",
    "        cards: List[TopicalityViolationCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    violation_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=TopicalityViolationSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    topicality_violation_agent = ConversableAgent(\n",
    "        name=\"topicality_violation_agent\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater and debate coach. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Read the provided affirmative debate case and negative case HTML (which already contains the topicality interpretation).\\n\"\n",
    "            f\"2. Formulate a formalized topicality violation, explaining exactly how the affirmative's plan or advocacy violates the negative's interpretation. \"\n",
    "            f\"3. Write the violation in a formal debate style, including a clear explanation of the violation and a brief reference to the standards/impacts. \"\n",
    "            f\"4. Suggest the best possible search query to find evidence or examples supporting this violation (e.g., plan text, advocacy statements, or authoritative analysis showing the violation).\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    topicality_violation_eval_agent = ConversableAgent(\n",
    "        name=\"topicality_violation_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether the violation explanation and any supporting evidence meet the highest standards for inclusion as support for a topicality violation in policy debate. \"\n",
    "            f\"For each piece of evidence or explanation, meticulously scrutinize its:\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly show how the affirmative violates the interpretation)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the violation, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the violation as interpreted)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the violation)\\n\\n\"\n",
    "            f\"After evaluating, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the topicality violation\\n\"\n",
    "            f\"4. Only approve evidence/explanations that meet ALL evaluation criteria and are strictly violation-relevant\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, interpretation-specific violation explanation, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=violation_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    topicality_violation_search_agent = ConversableAgent(\n",
    "        name=\"topicality_violation_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=topicality_violation_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return topicality_violation_agent\n",
    "\n",
    "        if last_speaker is topicality_violation_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return topicality_violation_eval_agent\n",
    "        \n",
    "        if last_speaker is topicality_violation_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return topicality_violation_search_agent\n",
    "            else:\n",
    "                return topicality_violation_search_agent\n",
    "\n",
    "        if last_speaker is topicality_violation_agent:\n",
    "            return topicality_violation_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[topicality_violation_agent, topicality_violation_search_agent, executor_agent, topicality_violation_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case and negative_case_html for context\n",
    "    chat_result = topicality_violation_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Formulate a formalized topicality violation, explaining exactly how the affirmative's plan or advocacy violates the negative's interpretation. \"\n",
    "            \"Write the violation in a formal debate style, including a clear explanation of the violation and a brief reference to the standards/impacts. \"\n",
    "            \"Then, find the best, most interpretation-specific evidence or example supporting this topicality violation. \"\n",
    "            \"Only consider evidence or examples that directly and specifically show the violation. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the violation.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    topicality_violation_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(topicality_violation_raw_string)[\"cards\"][0]\n",
    "    topicality_violation_id = card_json[\"id\"]\n",
    "    formalized_violation = card_json.get(\"formalized_topicality_violation\", \"\")\n",
    "    # Get the card document\n",
    "    topicality_violation_doc = get_document_by_id(topicality_violation_id)\n",
    "    card_markup = str(topicality_violation_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2 and div, p tags (but do NOT include the evidence/card_markup)\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Topicality Violation</h2>\"\n",
    "        f\"\\n<div><p>{formalized_violation}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_topicality_violation(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Topicality Reasons to Prefer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_topicality_reasons_to_prefer_and_evidence(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given an affirmative debate_case string and a negative_case_html string (which already contains the formal interpretation and violation),\n",
    "    generates three long, detailed, formalized \"reasons to prefer\" the negative's interpretation (using classic policy debate topicality terminology such as 'ground', 'competing interpretations', 'education', etc),\n",
    "    and finds a piece of evidence supporting these reasons. Appends only the reasons to the negative_case_html using h2, div, and p tags.\n",
    "    Returns the updated negative_case_html string.\n",
    "    The reasons to prefer should NOT reference or mention the evidence that is gathered.\n",
    "    \"\"\"\n",
    "    from typing import List, Literal\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    class TopicalityReasonCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        detailed_reasons_to_prefer_arguments_as_delivered_in_debate: List[str] = Field(\n",
    "            ..., min_items=3, max_items=3\n",
    "        )  # 3 long, detailed, debate-style reasons using classic topicality terms\n",
    "\n",
    "    class TopicalityReasonCardSearchResult(BaseModel):\n",
    "        cards: List[TopicalityReasonCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=TopicalityReasonCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    topicality_reasons_agent = ConversableAgent(\n",
    "        name=\"topicality_reasons_agent\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater and debate coach. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Read the provided affirmative debate case and negative case HTML (which already contains the formal interpretation and violation).\\n\"\n",
    "            f\"2. Generate exactly three distinct, formalized 'reasons to prefer' the negative's interpretation, using classic policy debate topicality terminology such as 'competing interpretations', 'education', 'ground', 'predictability', 'limits', 'fairness', 'reasonability', etc. \"\n",
    "            f\"Each reason should be long, detailed, strategic, and written in a formal debate style, thoroughly explaining the rationale, impact, and strategic value of the reason in the context of policy debate theory. Each reason should be at least 5-7 sentences and include specific examples, impacts, and theoretical warrants. These should be the kinds of arguments a debater would actually deliver out loud in a round, using the above topicality terms as the core of each reason.\\n\"\n",
    "            f\"Do NOT reference or mention any evidence or cards in the reasons to prefer. The reasons should stand alone as arguments, without referring to any evidence that may be gathered later.\\n\"\n",
    "            f\"3. Suggest the best possible search query to find evidence supporting these reasons to prefer, using debate topicality terminology (e.g., 'competing interpretations standard', 'education standard topicality', 'ground limits topicality', etc).\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    topicality_eval_agent = ConversableAgent(\n",
    "        name=\"topicality_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as support for reasons to prefer a topicality interpretation in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            f\"- Empirical or doctrinal basis (must be supported by concrete data, legal precedent, or academic consensus)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the reasons to prefer, using debate topicality terminology)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the reasons, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the reason as interpreted)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the reason)\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the reasons to prefer\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly relevant to the reasons to prefer\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, reason-to-prefer-specific evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    topicality_search_agent = ConversableAgent(\n",
    "        name=\"topicality_search_agent\",\n",
    "        system_message=(\n",
    "            \"You are a helpful assistant that can search the debate evidence dataset for a given tag. \"\n",
    "            \"When searching for evidence to support reasons to prefer a topicality interpretation, \"\n",
    "            \"use classic policy debate topicality terminology in your queries, such as 'competing interpretations', 'education', 'ground', 'predictability', 'limits', 'fairness', 'reasonability', etc. \"\n",
    "            \"Your query will retrieve a list of debate cards.\"\n",
    "        ),\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=topicality_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return topicality_reasons_agent\n",
    "\n",
    "        if last_speaker is topicality_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return topicality_eval_agent\n",
    "        \n",
    "        if last_speaker is topicality_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return topicality_search_agent\n",
    "            else:\n",
    "                return topicality_search_agent\n",
    "\n",
    "        if last_speaker is topicality_reasons_agent:\n",
    "            return topicality_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[topicality_reasons_agent, topicality_search_agent, executor_agent, topicality_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case and negative_case_html for context\n",
    "    chat_result = topicality_reasons_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Given the formal interpretation and violation already included above, generate exactly three distinct, formalized 'reasons to prefer' the negative's interpretation, using classic policy debate topicality terminology such as 'competing interpretations', 'education', 'ground', 'predictability', 'limits', 'fairness', 'reasonability', etc. \"\n",
    "            \"Each reason should be long, detailed, strategic, and written in a formal debate style, thoroughly explaining the rationale, impact, and strategic value of the reason in the context of policy debate theory. Each reason should be at least 5-7 sentences and include specific examples, impacts, and theoretical warrants. These should be the kinds of arguments a debater would actually deliver out loud in a round, using the above topicality terms as the core of each reason. \"\n",
    "            \"Do NOT reference or mention any evidence or cards in the reasons to prefer. The reasons should stand alone as arguments, without referring to any evidence that may be gathered later. \"\n",
    "            \"Then, find the best, most reason-to-prefer-specific evidence supporting these reasons. \"\n",
    "            \"Only consider evidence that directly and specifically supports the reasons to prefer. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the reasons to prefer.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    topicality_reasons_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(topicality_reasons_raw_string)[\"cards\"][0]\n",
    "    topicality_reason_evidence_id = card_json[\"id\"]\n",
    "    detailed_reasons_to_prefer_arguments_as_delivered_in_debate = card_json.get(\n",
    "        \"detailed_reasons_to_prefer_arguments_as_delivered_in_debate\", []\n",
    "    )\n",
    "    # Get the card document (not used in HTML output)\n",
    "    # topicality_reason_evidence_doc = get_document_by_id(topicality_reason_evidence_id)\n",
    "    # card_markup = str(topicality_reason_evidence_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags (do NOT add the card itself)\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Topicality Reasons to Prefer</h2>\"\n",
    "        f\"\\n<div><ol>\"\n",
    "        + \"\".join(f\"<li>{reason}</li>\" for reason in detailed_reasons_to_prefer_arguments_as_delivered_in_debate)\n",
    "        + \"</ol></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_topicality_reasons_to_prefer_and_evidence(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Theory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#theory_title = negative_case['theory']['title']\n",
    "#theory_core_argument = negative_case['theory']['core_argument']\n",
    "#negative_case_html += f\"<h2>{theory_title}</h2>\\n<p>{theory_core_argument}</p>\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Theory Interpretation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_theory_interpretation_and_evidence(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given an affirmative debate_case string and a negative_case_html string,\n",
    "    finds the best formalized theory interpretation and corresponding evidence to support it,\n",
    "    and appends both to the negative_case_html using h2, div, and p tags.\n",
    "    Returns the updated negative_case_html string.\n",
    "    \"\"\"\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        formalized_theory_interpretation: str  # Formalized theory interpretation text\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    theory_interpretation_agent = ConversableAgent(\n",
    "        name=\"theory_interpretation_agent\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater and debate coach. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Read the provided affirmative debate case and negative case HTML.\\n\"\n",
    "            f\"2. Formulate a formalized theory interpretation (definition of a debate theory concept, such as conditionality, severance, or specification) that is strategic, precise, and would help the negative win the round. \"\n",
    "            f\"3. Write the interpretation in a formal debate style, including the concept being defined, the definition, and a brief standards/violation explanation. \"\n",
    "            f\"4. Suggest the best possible search query to find evidence supporting this theory interpretation (e.g., debate theory articles, academic sources, or authoritative debate handbooks).\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    theory_eval_agent = ConversableAgent(\n",
    "        name=\"theory_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as support for a theory interpretation in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized debate theorists, coaches, or authoritative sources)\\n\"\n",
    "            f\"- Empirical or doctrinal basis (must be supported by concrete debate practice, academic consensus, or published theory literature)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the theory interpretation)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the interpretation, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the definition or standard as interpreted)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the interpretation)\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the theory interpretation\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly interpretation-relevant\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, interpretation-specific evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    theory_search_agent = ConversableAgent(\n",
    "        name=\"theory_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=theory_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return theory_interpretation_agent\n",
    "\n",
    "        if last_speaker is theory_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return theory_eval_agent\n",
    "        \n",
    "        if last_speaker is theory_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return theory_search_agent\n",
    "            else:\n",
    "                return theory_search_agent\n",
    "\n",
    "        if last_speaker is theory_interpretation_agent:\n",
    "            return theory_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[theory_interpretation_agent, theory_search_agent, executor_agent, theory_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case and negative_case_html for context\n",
    "    chat_result = theory_interpretation_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Formulate a formalized theory interpretation (definition of a debate theory concept, such as conditionality, severance, or specification) that is strategic, precise, and would help the negative win the round. \"\n",
    "            \"Write the interpretation in a formal debate style, including the concept being defined, the definition, and a brief standards/violation explanation. \"\n",
    "            \"Then, find the best, most interpretation-specific evidence supporting this theory interpretation. \"\n",
    "            \"Only consider evidence that directly and specifically supports the interpretation. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the interpretation.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    theory_evidence_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(theory_evidence_raw_string)[\"cards\"][0]\n",
    "    theory_evidence_id = card_json[\"id\"]\n",
    "    formalized_interpretation = card_json.get(\"formalized_theory_interpretation\", \"\")\n",
    "    # Get the card document\n",
    "    theory_evidence_doc = get_document_by_id(theory_evidence_id)\n",
    "    card_markup = str(theory_evidence_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Theory Interpretation and Evidence</h2>\"\n",
    "        f\"\\n<div><p>{formalized_interpretation}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_theory_interpretation_and_evidence(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Theory Violation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_theory_violation_and_grounding_evidence(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given an affirmative debate_case string and a negative_case_html string,\n",
    "    finds the best formalized theory violation argument (not just an interpretation) and corresponding evidence to ground it,\n",
    "    and appends only the violation (not the evidence) to the negative_case_html using h2, div, and p tags.\n",
    "    Returns the updated negative_case_html string.\n",
    "    \"\"\"\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        formalized_theory_violation: str  # Formalized theory violation text\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    theory_violation_agent = ConversableAgent(\n",
    "        name=\"theory_violation_agent\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater and debate coach. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Read the provided affirmative debate case and negative case HTML.\\n\"\n",
    "            f\"2. Formulate a formalized theory violation argument (not just an interpretation, but a violation argument including the interpretation, the violation, and standards) that is strategic, precise, and would help the negative win the round. \"\n",
    "            f\"3. Write the violation argument in a formal debate style, including the concept being defined, the interpretation, the violation (how the affirmative violates the interpretation), and a brief standards explanation. \"\n",
    "            f\"4. Suggest the best possible search query to find evidence supporting this theory violation (e.g., debate theory articles, academic sources, or authoritative debate handbooks).\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    theory_eval_agent = ConversableAgent(\n",
    "        name=\"theory_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as support for a theory violation in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized debate theorists, coaches, or authoritative sources)\\n\"\n",
    "            f\"- Empirical or doctrinal basis (must be supported by concrete debate practice, academic consensus, or published theory literature)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the theory violation argument)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the violation, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the violation or standard as interpreted)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the violation)\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the theory violation\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly violation-relevant\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, violation-specific evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    theory_search_agent = ConversableAgent(\n",
    "        name=\"theory_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=theory_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return theory_violation_agent\n",
    "\n",
    "        if last_speaker is theory_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return theory_eval_agent\n",
    "        \n",
    "        if last_speaker is theory_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return theory_search_agent\n",
    "            else:\n",
    "                return theory_search_agent\n",
    "\n",
    "        if last_speaker is theory_violation_agent:\n",
    "            return theory_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[theory_violation_agent, theory_search_agent, executor_agent, theory_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case and negative_case_html for context\n",
    "    chat_result = theory_violation_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Formulate a formalized theory violation argument (not just an interpretation, but a violation argument including the interpretation, the violation, and standards) that is strategic, precise, and would help the negative win the round. \"\n",
    "            \"Write the violation argument in a formal debate style, including the concept being defined, the interpretation, the violation (how the affirmative violates the interpretation), and a brief standards explanation. \"\n",
    "            \"Then, find the best, most violation-specific evidence supporting this theory violation. \"\n",
    "            \"Only consider evidence that directly and specifically supports the violation. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the violation.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    theory_evidence_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(theory_evidence_raw_string)[\"cards\"][0]\n",
    "    theory_evidence_id = card_json[\"id\"]\n",
    "    formalized_violation = card_json.get(\"formalized_theory_violation\", \"\")\n",
    "    # Get the card document (for grounding, but do not append to HTML)\n",
    "    theory_evidence_doc = get_document_by_id(theory_evidence_id)\n",
    "    card_markup = str(theory_evidence_doc['markup'])\n",
    "\n",
    "    # Append only the violation argument to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Theory Violation Argument</h2>\"\n",
    "        f\"\\n<div><p>{formalized_violation}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_theory_violation_and_grounding_evidence(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Theory Reasons to Prefer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_theory_reasons_to_prefer_and_evidence(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given an affirmative debate_case string and a negative_case_html string (which already contains the formal interpretation and violation),\n",
    "    generates three long, detailed, formalized \"reasons to prefer\" the negative's theory interpretation (using classic policy debate theory terminology such as 'competing interpretations', 'abuse', 'predictability', 'education', 'fairness', 'jurisdiction', etc),\n",
    "    and finds a piece of evidence supporting these reasons. Appends only the reasons to the negative_case_html using h2, div, and p tags.\n",
    "    Returns the updated negative_case_html string.\n",
    "    The reasons to prefer should NOT reference or mention the evidence that is gathered.\n",
    "    \"\"\"\n",
    "    from typing import List, Literal\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    class TheoryReasonCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        detailed_reasons_to_prefer_arguments_as_delivered_in_debate: List[str] = Field(\n",
    "            ..., min_items=3, max_items=3\n",
    "        )  # 3 long, detailed, debate-style reasons using classic theory terms\n",
    "\n",
    "    class TheoryReasonCardSearchResult(BaseModel):\n",
    "        cards: List[TheoryReasonCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=TheoryReasonCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    theory_reasons_agent = ConversableAgent(\n",
    "        name=\"theory_reasons_agent\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater and debate coach. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Read the provided affirmative debate case and negative case HTML (which already contains the formal interpretation and violation).\\n\"\n",
    "            f\"2. Generate exactly three distinct, formalized 'reasons to prefer' the negative's theory interpretation, using classic policy debate theory terminology such as 'competing interpretations', 'abuse', 'predictability', 'education', 'fairness', 'jurisdiction', 'brightline', 'limits', 'ground', etc. \"\n",
    "            f\"Each reason should be long, detailed, strategic, and written in a formal debate style, thoroughly explaining the rationale, impact, and strategic value of the reason in the context of policy debate theory. Each reason should be at least 5-7 sentences and include specific examples, impacts, and theoretical warrants. These should be the kinds of arguments a debater would actually deliver out loud in a round, using the above theory terms as the core of each reason.\\n\"\n",
    "            f\"Do NOT reference or mention any evidence or cards in the reasons to prefer. The reasons should stand alone as arguments, without referring to any evidence that may be gathered later.\\n\"\n",
    "            f\"3. Suggest the best possible search query to find evidence supporting these reasons to prefer, using debate theory terminology (e.g., 'competing interpretations standard', 'abuse standard theory', 'predictability fairness theory', etc).\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    theory_eval_agent = ConversableAgent(\n",
    "        name=\"theory_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as support for reasons to prefer a theory interpretation in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            f\"- Empirical or doctrinal basis (must be supported by concrete data, legal precedent, or academic consensus)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the reasons to prefer, using debate theory terminology)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the reasons, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the reason as interpreted)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the reason)\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the reasons to prefer\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly relevant to the reasons to prefer\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, reason-to-prefer-specific evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    theory_search_agent = ConversableAgent(\n",
    "        name=\"theory_search_agent\",\n",
    "        system_message=(\n",
    "            \"You are a helpful assistant that can search the debate evidence dataset for a given tag. \"\n",
    "            \"When searching for evidence to support reasons to prefer a theory interpretation, \"\n",
    "            \"use classic policy debate theory terminology in your queries, such as 'competing interpretations', 'abuse', 'predictability', 'education', 'fairness', 'jurisdiction', 'brightline', 'limits', 'ground', etc. \"\n",
    "            \"Your query will retrieve a list of debate cards.\"\n",
    "        ),\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=theory_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return theory_reasons_agent\n",
    "\n",
    "        if last_speaker is theory_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return theory_eval_agent\n",
    "        \n",
    "        if last_speaker is theory_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return theory_search_agent\n",
    "            else:\n",
    "                return theory_search_agent\n",
    "\n",
    "        if last_speaker is theory_reasons_agent:\n",
    "            return theory_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[theory_reasons_agent, theory_search_agent, executor_agent, theory_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case and negative_case_html for context\n",
    "    chat_result = theory_reasons_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"Given the formal interpretation and violation already included above, generate exactly three distinct, formalized 'reasons to prefer' the negative's theory interpretation, using classic policy debate theory terminology such as 'competing interpretations', 'abuse', 'predictability', 'education', 'fairness', 'jurisdiction', 'brightline', 'limits', 'ground', etc. \"\n",
    "            \"Each reason should be long, detailed, strategic, and written in a formal debate style, thoroughly explaining the rationale, impact, and strategic value of the reason in the context of policy debate theory. Each reason should be at least 5-7 sentences and include specific examples, impacts, and theoretical warrants. These should be the kinds of arguments a debater would actually deliver out loud in a round, using the above theory terms as the core of each reason. \"\n",
    "            \"Do NOT reference or mention any evidence or cards in the reasons to prefer. The reasons should stand alone as arguments, without referring to any evidence that may be gathered later. \"\n",
    "            \"Then, find the best, most reason-to-prefer-specific evidence supporting these reasons. \"\n",
    "            \"Only consider evidence that directly and specifically supports the reasons to prefer. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the reasons to prefer.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    theory_reasons_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(theory_reasons_raw_string)[\"cards\"][0]\n",
    "    theory_reason_evidence_id = card_json[\"id\"]\n",
    "    detailed_reasons_to_prefer_arguments_as_delivered_in_debate = card_json.get(\n",
    "        \"detailed_reasons_to_prefer_arguments_as_delivered_in_debate\", []\n",
    "    )\n",
    "    # Get the card document (not used in HTML output)\n",
    "    # theory_reason_evidence_doc = get_document_by_id(theory_reason_evidence_id)\n",
    "    # card_markup = str(theory_reason_evidence_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags (do NOT add the card itself)\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Theory Reasons to Prefer</h2>\"\n",
    "        f\"\\n<div><ol>\"\n",
    "        + \"\".join(f\"<li>{reason}</li>\" for reason in detailed_reasons_to_prefer_arguments_as_delivered_in_debate)\n",
    "        + \"</ol></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_theory_reasons_to_prefer_and_evidence(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Disadvantages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#disadvantage_title = negative_case['disadvantages'][0]['title']\n",
    "#disadvantage_core_argument = negative_case['disadvantages'][0]['core_argument']\n",
    "#negative_case_html += f\"<h2>{disadvantage_title}</h2>\\n<p>{disadvantage_core_argument}</p>\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Disadvantage Uniqueness"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_disadvantage_uniqueness_to_case(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given a debate_case string and a negative_case_html string,\n",
    "    finds the best uniqueness evidence for the disadvantage and appends it to the negative_case_html using h2, div, and p tags.\n",
    "    The output includes the retagged argument as read out loud in the debate round and the card itself, but not the reason.\n",
    "    Returns the modified negative_case_html string.\n",
    "    \"\"\"\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater focused on finding the best possible evidence of uniqueness for a specific disadvantage in a policy debate case. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Break down the plan, link, internal link, and impact of the disadvantage into their key components and causal relationships.\\n\"\n",
    "            f\"2. Guide evidence collection by:\\n\"\n",
    "            f\"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the current status quo regarding the impact area of the disadvantage (i.e., uniqueness). \"\n",
    "            f\"   - Using BM25 search to find relevant cards from a debate evidence database (cutoff year 2022).\\n\"\n",
    "            f\"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific uniqueness evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            f\"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the uniqueness claim for the disadvantage.\\n\"\n",
    "            f\"3. Evaluate evidence quality for:\\n\"\n",
    "            f\"   - Direct, explicit support for the uniqueness claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            f\"   - Specificity: The evidence must establish the current state of affairs in the impact area of the disadvantage, and explain why the impact is not already occurring or is not inevitable absent the plan.\\n\"\n",
    "            f\"   - Empirical support and authoritativeness.\\n\"\n",
    "            f\"Reject any evidence that does not fully and directly support the uniqueness of the disadvantage or that could be interpreted as generic or non-specific. \"\n",
    "            f\"Your goal is to find the strictest, most disadvantage- and plan-relevant uniqueness evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as uniqueness evidence supporting a specific disadvantage in policy debate. \"\n",
    "            f\"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            f\"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            f\"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            f\"- Direct relevance (must precisely and explicitly support the uniqueness claim for the disadvantage)\\n\"\n",
    "            f\"- Strategic value (must provide unique and compelling support for the disadvantage's uniqueness, not just generic background)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the current state of affairs in the impact area of the disadvantage)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the uniqueness link for the disadvantage)\\n\\n\"\n",
    "            f\"After evaluating the evidence, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the disadvantage's uniqueness link chains\\n\"\n",
    "            f\"4. Only approve evidence that meets ALL evaluation criteria and is strictly disadvantage- and plan-relevant for uniqueness\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, disadvantage- and plan-specific uniqueness evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Add the negative case here\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            f\"Find the best, most plan- and disadvantage-specific evidence of uniqueness supporting the disadvantage articulated above. \"\n",
    "            f\"Only consider evidence that directly and specifically supports the claim that the impact area of the disadvantage is not already occurring, is not inevitable, or is not being solved in the status quo. \"\n",
    "            f\"Reject any evidence that is generic, tangential, or not relevant to the uniqueness of the disadvantage. \"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    disadvantage_uniqueness_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(disadvantage_uniqueness_raw_string)[\"cards\"][0]\n",
    "    disadvantage_uniqueness_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "    # Get the card document\n",
    "    disadvantage_uniqueness_doc = get_document_by_id(disadvantage_uniqueness_id)\n",
    "    card_markup = str(disadvantage_uniqueness_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Disadvantage Uniqueness</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#negative_case_html = add_disadvantage_uniqueness_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Disadvantage Link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_disadvantage_link_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of a causal link for the disadvantage in a policy debate case. \"\n",
    "            \"The debate topic, plan, and disadvantage (including uniqueness evidence) have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan and disadvantage (including uniqueness) into their key components and causal relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates that the plan, if enacted, would cause the impact described in the disadvantage (i.e., link evidence). \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific link evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the link claim for the disadvantage.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the link claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the plan, as proposed, will cause the impact described in the disadvantage, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the link between the plan and the impact of the disadvantage, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most disadvantage- and plan-relevant link evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as link evidence supporting the disadvantage in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the link claim for the disadvantage)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the disadvantage's link, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the plan, as proposed, will cause the impact described in the disadvantage)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the link for the disadvantage)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the disadvantage's link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly disadvantage- and plan-relevant for the link\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, disadvantage- and plan-specific link evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the disadvantage (including uniqueness evidence) for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"Negative Case:\\n{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The disadvantage and its uniqueness evidence have already been established above.\\n\"\n",
    "            \"Find the best and most plan- and disadvantage-specific evidence of a causal link supporting the disadvantage articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the plan, if enacted, would cause the impact described in the disadvantage. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the link between the plan and the disadvantage. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    disadvantage_link_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    disadvantage_link_json = json.loads(disadvantage_link_raw_string)\n",
    "    card_json = disadvantage_link_json[\"cards\"][0]\n",
    "    disadvantage_link_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    disadvantage_link_doc = get_document_by_id(disadvantage_link_id)\n",
    "    card_markup = str(disadvantage_link_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Disadvantage Link</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_disadvantage_link_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Disadvantage Internal Link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_disadvantage_internal_link_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of a causal internal link for the disadvantage in a policy debate case. \"\n",
    "            \"The debate topic, plan, and disadvantage (including uniqueness and link evidence) have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan and disadvantage (including uniqueness and link) into their key components and causal relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the internal link: that the link (already established) will cause the impact described in the disadvantage (i.e., internal link evidence). \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific internal link evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the internal link claim for the disadvantage.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the internal link claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the link, as proposed, will cause the impact described in the disadvantage, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the internal link between the link and the impact of the disadvantage, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most disadvantage- and plan-relevant internal link evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as internal link evidence supporting the disadvantage in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the internal link claim for the disadvantage)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the disadvantage's internal link, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the link, as proposed, will cause the impact described in the disadvantage)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the internal link for the disadvantage)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the disadvantage's internal link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly disadvantage- and plan-relevant for the internal link\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, disadvantage- and plan-specific internal link evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the disadvantage (including uniqueness and link evidence) for context\n",
    "\n",
    "    # Add the negative case here\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The disadvantage, its uniqueness, and link evidence have already been established above.\\n\"\n",
    "            \"Find the best and most plan- and disadvantage-specific evidence of a causal internal link supporting the disadvantage articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the link (already established) will cause the impact described in the disadvantage. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the internal link between the link and the disadvantage's impact. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    disadvantage_internal_link_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    disadvantage_internal_link_json = json.loads(disadvantage_internal_link_raw_string)\n",
    "    card_json = disadvantage_internal_link_json[\"cards\"][0]\n",
    "    disadvantage_internal_link_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    disadvantage_internal_link_doc = get_document_by_id(disadvantage_internal_link_id)\n",
    "    card_markup = str(disadvantage_internal_link_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Disadvantage Internal Link</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_disadvantage_internal_link_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Disadvantage Impact"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_disadvantage_impact_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of a disadvantage impact for a policy debate case. \"\n",
    "            \"The debate topic, plan, and disadvantage (including uniqueness and link evidence) have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan and disadvantage (including uniqueness and link) into their key components and causal relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the impact: that the internal link (already established) will cause the impact described in the disadvantage (i.e., impact evidence). \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific impact evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the impact claim for the disadvantage.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the impact claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the internal link, as proposed, will cause the impact described in the disadvantage, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the impact of the disadvantage, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most disadvantage- and plan-relevant impact evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as impact evidence supporting the disadvantage in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the impact claim for the disadvantage)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the disadvantage's impact, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the internal link, as proposed, will cause the impact described in the disadvantage)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the impact for the disadvantage)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the disadvantage's impact chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly disadvantage- and plan-relevant for the impact\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, disadvantage- and plan-specific impact evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the disadvantage (including uniqueness and link evidence) for context\n",
    "\n",
    "    # Add the negative case here\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The disadvantage, its uniqueness, and link evidence have already been established above.\\n\"\n",
    "            \"Find the best and most plan- and disadvantage-specific evidence of a causal impact supporting the disadvantage articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the internal link (already established) will cause the impact described in the disadvantage. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the impact of the disadvantage. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    disadvantage_impact_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    disadvantage_impact_json = json.loads(disadvantage_impact_raw_string)\n",
    "    card_json = disadvantage_impact_json[\"cards\"][0]\n",
    "    disadvantage_impact_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    disadvantage_impact_doc = get_document_by_id(disadvantage_impact_id)\n",
    "    card_markup = str(disadvantage_impact_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Disadvantage Impact</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_disadvantage_impact_to_case(debate_case, negative_case_html)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Counterplan"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Counterplan Text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_counterplan_text_to_case(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Given a debate_case string and a negative_case_html string,\n",
    "    finds the best counterplan text (including all types: PICs, Advantage CPs, etc.) and appends only the counterplan text (not the card) to the negative_case_html using h2, div, and p tags.\n",
    "    The output includes the counterplan text as it would be read out loud in a debate round, but does not include the card itself.\n",
    "    Returns the modified negative_case_html string.\n",
    "    \"\"\"\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        counterplantext: str  # The counterplan text to be presented as read out loud in the debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent setup\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            f\"You are an expert policy debater focused on finding the best possible counterplan text for a policy debate case. \"\n",
    "            f\"Your job is to:\\n\"\n",
    "            f\"1. Consider all possible types of counterplans, including but not limited to Plan-Inclusive Counterplans (PICs), Advantage Counterplans, Exclusionary Counterplans, Consult/Condition Counterplans, and any other strategic counterplan options. \"\n",
    "            f\"2. Break down the plan, advantages, and disadvantages to identify the most strategic and competitive counterplan text. \"\n",
    "            f\"3. Guide evidence collection and counterplan drafting by:\\n\"\n",
    "            f\"   - Formulating extremely precise search queries that target counterplan texts which are directly competitive with the plan and relevant to the debate case. \"\n",
    "            f\"   - Using BM25 search to find relevant counterplan texts from a debate evidence database (cutoff year 2022).\\n\"\n",
    "            f\"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific counterplan texts. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            f\"   - Suggest query refinements to maximize the chance of finding counterplan texts that are directly competitive and strategic.\\n\"\n",
    "            f\"4. Evaluate counterplan quality for:\\n\"\n",
    "            f\"   - Direct, explicit competition with the plan (must not merely be tangentially related or generic alternatives).\\n\"\n",
    "            f\"   - Specificity: The counterplan text must clearly articulate what the counterplan does, and how it is distinct from the plan.\\n\"\n",
    "            f\"   - Strategic value and theoretical legitimacy (PICs, Advantage CPs, etc. are all on the table).\\n\"\n",
    "            f\"Reject any counterplan text that does not fully and directly compete with the plan or that could be interpreted as generic or non-specific. \"\n",
    "            f\"Your goal is to find the strictest, most plan-relevant, and strategic counterplan text possible, ensuring that each selected counterplan is unique and not a duplicate of any previously included counterplan.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            f\"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            f\"Your job is to strictly evaluate whether a counterplan text meets the highest standards for inclusion as a competitive counterplan in policy debate. \"\n",
    "            f\"For each counterplan text, meticulously scrutinize its:\\n\"\n",
    "            f\"- Direct competition with the plan (must be a legitimate counterplan, not just a generic alternative)\\n\"\n",
    "            f\"- Strategic value (must provide a unique and compelling strategic option, not just a restatement of the plan or a trivial alternative)\\n\"\n",
    "            f\"- Specificity (must not duplicate or closely overlap with other selected counterplans, and must clearly articulate the counterplan's mechanism)\\n\"\n",
    "            f\"- Wording precision (must use exact terminology needed to establish the counterplan's competitiveness and strategic value)\\n\\n\"\n",
    "            f\"After evaluating the counterplan, you must:\\n\"\n",
    "            f\"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any counterplan that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            f\"2. Reject any counterplan that duplicates already selected counterplans\\n\"\n",
    "            f\"3. Ensure terminology precisely matches what's needed for the counterplan's competitiveness\\n\"\n",
    "            f\"4. Only approve counterplans that meet ALL evaluation criteria and are strictly plan-relevant and competitive\\n\\n\"\n",
    "            f\"Your goal is to ensure we have the highest quality, plan-specific, and competitive counterplan text, with absolutely no duplicate or generic counterplans, and that all included counterplans are retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the disadvantage, for context\n",
    "    # Also include the current negative_case_html for additional context\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"Current negative case:\\n{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            f\"Find the best, most plan-specific, competitive, and strategic counterplan text for the case articulated above. \"\n",
    "            f\"Consider all types of counterplans, including Plan-Inclusive Counterplans (PICs), Advantage Counterplans, Exclusionary Counterplans, Consult/Condition Counterplans, and any other strategic options. \"\n",
    "            f\"Only consider counterplan texts that directly and specifically compete with the plan and are relevant to the case. \"\n",
    "            f\"Reject any counterplan text that is generic, tangential, or not relevant to the plan. \"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # Parse the result\n",
    "    counterplan_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    card_json = json.loads(counterplan_raw_string)[\"cards\"][0]\n",
    "    counterplan_id = card_json[\"id\"]\n",
    "    counterplan_text = card_json.get(\"counterplantext\", \"\")\n",
    "    # Get the card document (still used for other purposes, but not appended to HTML)\n",
    "    counterplan_doc = get_document_by_id(counterplan_id)\n",
    "    card_markup = str(counterplan_doc['markup'])\n",
    "\n",
    "    # Append only the counterplan text to negative_case_html using h2, div, and p tags (do NOT append the card itself)\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Counterplan Text</h2>\"\n",
    "        f\"\\n<div><p>{counterplan_text}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_counterplan_text_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Counterplan Solvency"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_counterplan_solvency_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the counterplan text in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of counterplan solvency in a policy debate case. \"\n",
    "            \"The debate topic, plan, disadvantage, and counterplan have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan, counterplan, and the relevant advantage/disadvantage into their key components and causal relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates that the counterplan, if enacted, would solve the advantage(s) or mitigate the impact(s) claimed by the affirmative plan. \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific counterplan solvency evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the counterplan's ability to solve the relevant advantage(s) or mitigate the impact(s).\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the counterplan's solvency (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the counterplan, as proposed, will solve the advantage(s) or mitigate the impact(s) in question, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the counterplan's solvency, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most counterplan- and advantage/disadvantage-relevant solvency evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as counterplan solvency evidence in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the counterplan's ability to solve the advantage(s) or mitigate the impact(s))\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the counterplan's solvency, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the counterplan, as proposed, will solve the advantage(s) or mitigate the impact(s) in question)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the counterplan's solvency)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the counterplan's solvency chain\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly counterplan- and advantage/disadvantage-relevant for solvency\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, counterplan- and advantage/disadvantage-specific solvency evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, the disadvantage, and the counterplan for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The counterplan has already been established above.\\n\"\n",
    "            \"Find the best and most counterplan- and advantage/disadvantage-specific evidence of counterplan solvency articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the counterplan, if enacted, would solve the advantage(s) or mitigate the impact(s) described in the case. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the counterplan's solvency. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    counterplan_solvency_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    counterplan_solvency_json = json.loads(counterplan_solvency_raw_string)\n",
    "    card_json = counterplan_solvency_json[\"cards\"][0]\n",
    "    counterplan_solvency_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    counterplan_solvency_doc = get_document_by_id(counterplan_solvency_id)\n",
    "    card_markup = str(counterplan_solvency_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Counterplan Solvency</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_counterplan_solvency_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Counterplan Net Benefit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_counterplan_net_benefit_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the counterplan net benefit text in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of counterplan net benefits in a policy debate case. \"\n",
    "            \"The debate topic, plan, disadvantage, and counterplan have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan, counterplan, and the relevant advantage/disadvantage into their key components and causal relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates that the counterplan is net beneficial compared to the plan (e.g., avoids a disadvantage, achieves a unique benefit, or is preferable to the plan for a specific reason). \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific counterplan net benefit evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the counterplan's net benefit over the plan.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the counterplan's net benefit (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the counterplan, as proposed, is preferable to the plan, and explain the mechanism by which this occurs (e.g., avoids a disadvantage, achieves a unique benefit, etc.).\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the counterplan's net benefit, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most counterplan- and advantage/disadvantage-relevant net benefit evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as counterplan net benefit evidence in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly support the counterplan's net benefit over the plan, such as avoiding a disadvantage, achieving a unique benefit, or being preferable for a specific reason)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the counterplan's net benefit, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the counterplan, as proposed, is preferable to the plan in a specific, debate-relevant way)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the counterplan's net benefit)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the counterplan's net benefit chain\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly counterplan- and advantage/disadvantage-relevant for net benefit\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, counterplan- and advantage/disadvantage-specific net benefit evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, the disadvantage, and the counterplan for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The counterplan has already been established above.\\n\"\n",
    "            \"Find the best and most counterplan- and advantage/disadvantage-specific evidence of counterplan net benefit articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the counterplan, if enacted, is net beneficial compared to the plan (e.g., avoids a disadvantage, achieves a unique benefit, or is preferable to the plan for a specific reason). \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the counterplan's net benefit. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    counterplan_net_benefit_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    counterplan_net_benefit_json = json.loads(counterplan_net_benefit_raw_string)\n",
    "    card_json = counterplan_net_benefit_json[\"cards\"][0]\n",
    "    counterplan_net_benefit_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    counterplan_net_benefit_doc = get_document_by_id(counterplan_net_benefit_id)\n",
    "    card_markup = str(counterplan_net_benefit_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Counterplan Net Benefit</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_counterplan_net_benefit_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Kritik"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# kritik_title = negative_case['kritiks'][0]['title']\n",
    "# kritik_core_argument = negative_case['kritiks'][0]['core_argument']\n",
    "# kritik_alternative_text = negative_case['kritiks'][0].get('alternative_text', '')\n",
    "# negative_case_html += (\n",
    "#     f\"<h2>{kritik_title}</h2>\\n\"\n",
    "#     f\"<p>{kritik_core_argument}</p>\\n\"\n",
    "#     f\"<p><strong>Thus the Alternative:</strong> {kritik_alternative_text}</p>\"\n",
    "# )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Kritik Link"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_kritik_link_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    kritik_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"kritik_argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of a kritik link in a policy debate case. \"\n",
    "            \"The debate topic, plan, kritik core argument, and alternative have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the plan and kritik (including the core argument and alternative) into their key components and relationships.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the kritik's link—i.e., how the plan or its underlying assumptions/representations/epistemology/ontology/etc. cause or reproduce the harms or logic critiqued by the kritik. \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific kritik link evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the kritik's link claim.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the kritik link claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish that the plan, as proposed, or its underlying logic, causes or perpetuates the harms or logic critiqued by the kritik, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Authoritativeness and theoretical sophistication.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the kritik link, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most kritik- and plan-relevant link evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    kritik_eval_agent = ConversableAgent(\n",
    "        name=\"kritik_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and kritik argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as link evidence supporting the kritik in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources in critical theory, philosophy, or the relevant field)\\n\"\n",
    "            \"- Theoretical sophistication and relevance (must precisely and explicitly support the kritik's link claim)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the kritik's link, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish that the plan, as proposed, or its underlying logic, causes or perpetuates the harms or logic critiqued by the kritik)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the kritik link)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the kritik's link chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly kritik- and plan-relevant for the link\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, kritik- and plan-specific link evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=kritik_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    kritik_search_agent = ConversableAgent(\n",
    "        name=\"kritik_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=kritik_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is kritik_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return kritik_eval_agent\n",
    "        \n",
    "        if last_speaker is kritik_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return kritik_search_agent\n",
    "            else:\n",
    "                return kritik_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return kritik_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, kritik_search_agent, executor_agent, kritik_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the kritik (core argument and alternative) for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"Current negative case so far:\\n{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The kritik core argument and alternative have already been established above.\\n\"\n",
    "            \"Find the best and most plan- and kritik-specific evidence of a causal link supporting the kritik articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim that the plan, if enacted, or its underlying logic, causes or perpetuates the harms or logic critiqued by the kritik. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the link between the plan and the kritik. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    kritik_link_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    kritik_link_json = json.loads(kritik_link_raw_string)\n",
    "    card_json = kritik_link_json[\"cards\"][0]\n",
    "    kritik_link_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    kritik_link_doc = get_document_by_id(kritik_link_id)\n",
    "    card_markup = str(kritik_link_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Kritik Link</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_kritik_link_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Kritik Impact"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_kritik_impact_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    kritik_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"kritik_impact_argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence of a kritik impact in a policy debate case. \"\n",
    "            \"The debate topic, plan, kritik core argument, and alternative have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the kritik (including the core argument and alternative) into its key components and relationships, focusing on the ultimate impact or consequence of the kritik.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically demonstrates the kritik's impact—i.e., the ultimate harms, consequences, or theoretical implications established by the kritik. \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific kritik impact evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the kritik's impact claim.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the kritik impact claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish the ultimate harms, consequences, or theoretical implications of the kritik, and explain the mechanism by which these occur.\\n\"\n",
    "            \"   - Authoritativeness and theoretical sophistication.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the kritik impact, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most kritik-relevant impact evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    kritik_eval_agent = ConversableAgent(\n",
    "        name=\"kritik_impact_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and kritik argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as impact evidence supporting the kritik in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources in critical theory, philosophy, or the relevant field)\\n\"\n",
    "            \"- Theoretical sophistication and relevance (must precisely and explicitly support the kritik's impact claim)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the kritik's impact, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the ultimate harms, consequences, or theoretical implications of the kritik)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the kritik impact)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the kritik's impact chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly kritik-relevant for the impact\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, kritik-specific impact evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=kritik_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    kritik_search_agent = ConversableAgent(\n",
    "        name=\"kritik_impact_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=kritik_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is kritik_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return kritik_eval_agent\n",
    "        \n",
    "        if last_speaker is kritik_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return kritik_search_agent\n",
    "            else:\n",
    "                return kritik_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return kritik_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, kritik_search_agent, executor_agent, kritik_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the kritik (core argument and alternative) for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"Current negative_case_html:\\n{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The kritik core argument and alternative have already been established above.\\n\"\n",
    "            \"Find the best and most kritik-specific evidence of the ultimate impact or consequence supporting the kritik articulated above. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim about the harms, consequences, or theoretical implications established by the kritik. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the impact of the kritik. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    kritik_impact_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    kritik_impact_json = json.loads(kritik_impact_raw_string)\n",
    "    card_json = kritik_impact_json[\"cards\"][0]\n",
    "    kritik_impact_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    kritik_impact_doc = get_document_by_id(kritik_impact_id)\n",
    "    card_markup = str(kritik_impact_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Kritik Impact</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_kritik_impact_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Kritik Role of the Ballot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_kritik_role_of_ballot_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    kritik_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"kritik_role_of_ballot_argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on finding the best possible evidence for a kritik 'role of the ballot' argument in a policy debate case. \"\n",
    "            \"The debate topic, plan, kritik core argument, and alternative have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Break down the kritik (including the core argument and alternative) into its key components and relationships, focusing on the 'role of the ballot'—i.e., what the judge's ballot should endorse or reject, and why.\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically establishes or justifies the kritik's proposed role of the ballot. \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific 'role of the ballot' evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly supports the kritik's role of the ballot claim.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit support for the kritik's role of the ballot claim (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must establish what the judge's ballot should do in the context of the kritik, and explain the theoretical or strategic justification for that role.\\n\"\n",
    "            \"   - Authoritativeness and theoretical sophistication.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly support the kritik's role of the ballot, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most kritik-relevant 'role of the ballot' evidence possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    kritik_eval_agent = ConversableAgent(\n",
    "        name=\"kritik_role_of_ballot_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and kritik argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as 'role of the ballot' evidence supporting the kritik in policy debate. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources in critical theory, philosophy, or the relevant field)\\n\"\n",
    "            \"- Theoretical sophistication and relevance (must precisely and explicitly support the kritik's role of the ballot claim)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling support for the kritik's role of the ballot, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must establish the theoretical or strategic justification for the kritik's role of the ballot)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the kritik's role of the ballot)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the kritik's role of the ballot chains\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly kritik-relevant for the role of the ballot\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, kritik-specific 'role of the ballot' evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=kritik_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    kritik_search_agent = ConversableAgent(\n",
    "        name=\"kritik_role_of_ballot_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=kritik_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is kritik_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return kritik_eval_agent\n",
    "        \n",
    "        if last_speaker is kritik_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return kritik_search_agent\n",
    "            else:\n",
    "                return kritik_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return kritik_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, kritik_search_agent, executor_agent, kritik_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the kritik (core argument and alternative) for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The kritik core argument and alternative have already been established above.\\n\"\n",
    "            \"Find the best and most kritik-specific evidence justifying the kritik's proposed role of the ballot. \"\n",
    "            \"Only consider evidence that directly and specifically supports the claim about what the judge's ballot should endorse or reject, and the theoretical or strategic justification for that role, as established by the kritik. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the kritik's role of the ballot. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    kritik_role_of_ballot_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    kritik_role_of_ballot_json = json.loads(kritik_role_of_ballot_raw_string)\n",
    "    card_json = kritik_role_of_ballot_json[\"cards\"][0]\n",
    "    kritik_role_of_ballot_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    kritik_role_of_ballot_doc = get_document_by_id(kritik_role_of_ballot_id)\n",
    "    card_markup = str(kritik_role_of_ballot_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>Kritik Role of the Ballot</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# negative_case_html = add_kritik_role_of_ballot_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## On Case Rebuttals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_on_case_rebuttal_to_case(debate_case, negative_case_html):\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str  # Argument to be presented as the first card after the plantext in a debate round\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=1)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    argument_evaluator = ConversableAgent(\n",
    "        name=\"argument_evaluator\", \n",
    "        system_message=(\n",
    "            \"You are an expert policy debater focused on attacking the affirmative's case ON-CASE. \"\n",
    "            \"The debate topic, plan, and the full affirmative case (including all evidence and tags) have already been provided. \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Identify a single, specific piece of evidence (card) from the affirmative's case that is most strategic to attack (e.g., a key internal link, impact, or advantage card). \"\n",
    "            \"   - IMPORTANT: Clearly signpost and indicate the specific part of the case and the specific name (tag or cite) of the affirmative evidence (card) you are refuting. \"\n",
    "            \"   - Do NOT select a card that has already been refuted by any previous on-case rebuttal (if an on-case rebuttal to that card already exists in the negative_case_html, pick a different card).\\n\"\n",
    "            \"2. Guide evidence collection by:\\n\"\n",
    "            \"   - Formulating extremely precise search queries that target only evidence which directly and specifically turns, answers, or provides defense against that specific affirmative card. \"\n",
    "            \"   - Using BM25 search to find relevant cards from a debate evidence database.\\n\"\n",
    "            \"   - If you are being called after previous searches, you must significantly modify and refine your BM25 search queries to maximize the chance of finding new, more relevant, or more specific on-case rebuttal evidence. Do not simply repeat or slightly alter previous queries—make substantial changes to your search approach, keywords, or focus.\\n\"\n",
    "            \"   - Suggest query refinements to maximize the chance of finding evidence that directly answers or turns the targeted affirmative card.\\n\"\n",
    "            \"3. Evaluate evidence quality for:\\n\"\n",
    "            \"   - Direct, explicit clash with the targeted affirmative card (evidence must not merely be tangentially related or generic background).\\n\"\n",
    "            \"   - Specificity: The evidence must directly answer, turn, or provide defense against the specific claim or warrant in the targeted affirmative card, and explain the mechanism by which this occurs.\\n\"\n",
    "            \"   - Empirical support and authoritativeness.\\n\"\n",
    "            \"Reject any evidence that does not fully and directly answer, turn, or defend against the targeted affirmative card, or that could be interpreted as generic or non-specific. \"\n",
    "            \"Your goal is to find the strictest, most affirmative-evidence-specific on-case rebuttal possible, ensuring that each selected card is unique and not a duplicate of any previously included evidence. \"\n",
    "            \"In your output, always clearly signpost and indicate the specific part of the case and the specific name (tag or cite) of the affirmative evidence being refuted.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    debate_eval_agent = ConversableAgent(\n",
    "        name=\"debate_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are an extremely selective and rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether evidence meets the highest standards for inclusion as ON-CASE rebuttal evidence against the affirmative's case. \"\n",
    "            \"For each piece of evidence, meticulously scrutinize its:\\n\"\n",
    "            \"- Author qualifications (must be from recognized experts or authoritative sources)\\n\"\n",
    "            \"- Empirical basis (must be supported by concrete data and research)\\n\"\n",
    "            \"- Direct relevance (must precisely and explicitly answer, turn, or defend against the targeted affirmative card)\\n\"\n",
    "            \"- Strategic value (must provide unique and compelling clash with the affirmative's evidence, not just generic background)\\n\"\n",
    "            \"- Specificity (must not duplicate or closely overlap with other selected evidence, and must directly address the specific claim or warrant in the targeted affirmative card)\\n\"\n",
    "            \"- Wording precision (must use exact terminology needed to establish the on-case rebuttal)\\n\\n\"\n",
    "            \"After evaluating the evidence, you must:\\n\"\n",
    "            \"1. IMMEDIATELY REJECT (mark as 'False' and/or ignore) any evidence that has already been marked as 'include_it' in previous iterations\\n\"\n",
    "            \"2. Reject any evidence that duplicates already selected cards\\n\"\n",
    "            \"3. Ensure terminology precisely matches what's needed for the on-case rebuttal\\n\"\n",
    "            \"4. Only approve evidence that meets ALL evaluation criteria and is strictly on-case and affirmative-evidence-specific\\n\\n\"\n",
    "            \"Your goal is to ensure we have the highest quality, most specific on-case rebuttal evidence, with absolutely no duplicate or generic cards, and that all included evidence is retagged and recut with precise, policy debate-style markup.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return argument_evaluator\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return debate_eval_agent\n",
    "        \n",
    "        if last_speaker is debate_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                print(f\"iterations: {iterations}\")\n",
    "                if iterations >= 3:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is argument_evaluator:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[argument_evaluator, debate_search_agent, executor_agent, debate_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case, which includes the debate topic, the plan, and the full affirmative case (including all evidence and tags) for context\n",
    "\n",
    "    chat_result = argument_evaluator.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"Current negative case rebuttals (for context, do not repeat cards already refuted):\\n{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"The full affirmative case, including all evidence and tags, has already been established above.\\n\"\n",
    "            \"Identify a single, specific piece of affirmative evidence (card) that is most strategic to attack. \"\n",
    "            \"You must clearly signpost and indicate the specific part of the case and the specific name (tag or cite) of the affirmative evidence (card) you are refuting. \"\n",
    "            \"Do NOT select a card that has already been refuted by any previous on-case rebuttal (if an on-case rebuttal to that card already exists in the negative_case_html, pick a different card). \"\n",
    "            \"Find the best and most specific evidence that directly answers, turns, or provides defense against that specific affirmative card. \"\n",
    "            \"Only consider evidence that directly and specifically clashes with the claim, warrant, or impact of the targeted affirmative card. \"\n",
    "            \"Reject any evidence that is generic, tangential, or not relevant to the specific on-case rebuttal. \"\n",
    "            # No recency or cutoff requirements; old evidence is acceptable if it is high quality.\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    on_case_rebuttal_raw_string = chat_result.chat_history[-1][\"content\"]\n",
    "    on_case_rebuttal_json = json.loads(on_case_rebuttal_raw_string)\n",
    "    card_json = on_case_rebuttal_json[\"cards\"][0]\n",
    "    on_case_rebuttal_id = card_json[\"id\"]\n",
    "    retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "\n",
    "    on_case_rebuttal_doc = get_document_by_id(on_case_rebuttal_id)\n",
    "    card_markup = str(on_case_rebuttal_doc['markup'])\n",
    "\n",
    "    # Append to negative_case_html using h2, div, and p tags\n",
    "    negative_case_html += (\n",
    "        f\"\\n<h2>On-Case Rebuttal</h2>\"\n",
    "        f\"\\n<div><p>{retagged_argument}</p></div>\"\n",
    "        f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "    )\n",
    "    return negative_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for _ in range(1):\n",
    "#     negative_case_html = add_on_case_rebuttal_to_case(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1NC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def try_func(func, *args, **kwargs):\n",
    "    last_exception = None\n",
    "    for attempt in range(3):\n",
    "        try:\n",
    "            return func(*args, **kwargs)\n",
    "        except Exception as e:\n",
    "            last_exception = e\n",
    "            if attempt < 2:\n",
    "                continue\n",
    "            else:\n",
    "                raise\n",
    "    raise last_exception\n",
    "\n",
    "negative_case = try_func(generate_negative_offcase, debate_case)\n",
    "\n",
    "topicality = negative_case.get('topicality', {})\n",
    "topicality_title = topicality.get('title')\n",
    "topicality_core_argument = topicality.get('core_argument_summary_as_spoken_outloud_in_debate_round')\n",
    "\n",
    "if topicality_title and topicality_core_argument:\n",
    "    negative_case_html = f\"<h2>{topicality_title}</h2>\\n<p>{topicality_core_argument}</p>\"\n",
    "    negative_case_html = try_func(add_topicality_interpretation_and_evidence, debate_case, negative_case_html)\n",
    "    negative_case_html = try_func(add_topicality_violation, debate_case, negative_case_html)\n",
    "    negative_case_html = try_func(add_topicality_reasons_to_prefer_and_evidence, debate_case, negative_case_html)\n",
    "\n",
    "\n",
    "theory = negative_case.get('theory', {})\n",
    "theory_title = theory.get('title')\n",
    "theory_core_argument = theory.get('core_argument_summary_as_spoken_outloud_in_debate_round')\n",
    "\n",
    "if theory_title and theory_core_argument:\n",
    "    negative_case_html += f\"<h2>{theory_title}</h2>\\n<p>{theory_core_argument}</p>\"\n",
    "    negative_case_html = try_func(add_theory_interpretation_and_evidence, debate_case, negative_case_html)\n",
    "    negative_case_html = try_func(add_theory_violation_and_grounding_evidence, debate_case, negative_case_html)\n",
    "    negative_case_html = try_func(add_theory_reasons_to_prefer_and_evidence, debate_case, negative_case_html)\n",
    "\n",
    "\n",
    "disadvantage_title = negative_case['disadvantages'][0]['title']\n",
    "disadvantage_core_argument = negative_case['disadvantages'][0]['core_argument_summary_as_spoken_outloud_in_debate_round']\n",
    "negative_case_html += f\"<h2>{disadvantage_title}</h2>\\n<p>{disadvantage_core_argument}</p>\"\n",
    "negative_case_html = try_func(add_disadvantage_uniqueness_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_disadvantage_link_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_disadvantage_internal_link_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_disadvantage_impact_to_case, debate_case, negative_case_html)\n",
    "\n",
    "\n",
    "negative_case_html = try_func(add_counterplan_text_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_counterplan_solvency_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_counterplan_net_benefit_to_case, debate_case, negative_case_html)\n",
    "\n",
    "kritik_title = negative_case['kritiks'][0]['title']\n",
    "kritik_core_argument = negative_case['kritiks'][0]['core_argument_summary_as_spoken_outloud_in_debate_round']\n",
    "kritik_alternative_text = negative_case['kritiks'][0].get('alternative_text', '')\n",
    "negative_case_html += (\n",
    "    f\"<h2>{kritik_title}</h2>\\n\"\n",
    "    f\"<p>{kritik_core_argument}</p>\\n\"\n",
    "    f\"<p><strong>Thus the Alternative:</strong> {kritik_alternative_text}</p>\"\n",
    ")\n",
    "\n",
    "negative_case_html = try_func(add_kritik_link_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_kritik_impact_to_case, debate_case, negative_case_html)\n",
    "negative_case_html = try_func(add_kritik_role_of_ballot_to_case, debate_case, negative_case_html)\n",
    "\n",
    "for _ in range(3):\n",
    "    negative_case_html = try_func(add_on_case_rebuttal_to_case, debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(negative_case_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "negative_case_html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cross Examination (of the 1NC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate_1ac_cross_examination_of_1nc(debate_case: str, negative_case_html: str) -> str:\n",
    "    \"\"\"\n",
    "    Simulates the cross-examination of the 1NC by the affirmative.\n",
    "    Takes both the 1AC (debate_case) and the 1NC (negative_case_html) as input,\n",
    "    and returns a formatted HTML string of the cross-examination.\n",
    "    \"\"\"\n",
    "    from typing import List\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    # Define the structure for a cross-examination question and answer\n",
    "    class CrossExPair(BaseModel):\n",
    "        affirmative_question: str\n",
    "        negative_response: str\n",
    "\n",
    "    class CrossExamination(BaseModel):\n",
    "        cross_ex: List[CrossExPair] = Field(..., min_items=7, max_items=7)\n",
    "\n",
    "    # LLM config for all agents\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    cross_ex_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=CrossExamination,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent 1: Affirmative asks questions\n",
    "    affirmative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"affirmative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 1AC (affirmative) debater in a policy debate cross-examination. \"\n",
    "            \"Your job is to ask sharp, strategic, and challenging questions about the 1NC (negative case) just presented. \"\n",
    "            \"Focus on exposing weaknesses, ambiguities, or assumptions in the negative's theory, disadvantages, counterplans, and kritiks. \"\n",
    "            \"Ask one question at a time, and wait for the negative to answer before asking the next. \"\n",
    "            \"Do not answer your own questions. \"\n",
    "            \"Be concise and direct. \"\n",
    "            \"Do not repeat questions. \"\n",
    "            \"You will ask a total of 3 to 7 questions.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 2: Negative answers\n",
    "    negative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"negative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 1NC (negative) debater being cross-examined by the 1AC (affirmative) in a policy debate. \"\n",
    "            \"Your job is to answer each question as clearly, persuasively, and strategically as possible, defending the negative case. \"\n",
    "            \"Respond directly to the affirmative's question, but do not volunteer extra information. \"\n",
    "            \"Be concise and avoid rambling. \"\n",
    "            \"Do not ask questions yourself.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 3: Cross-ex summary agent (outputs the structured Q&A)\n",
    "    cross_ex_summary_agent = ConversableAgent(\n",
    "        name=\"cross_ex_summary_agent\",\n",
    "        system_message=(\n",
    "            \"You are a debate judge summarizing the 1AC's cross-examination of the 1NC. \"\n",
    "            \"Your job is to produce a structured list of question/answer pairs, each with an 'affirmative_question' and a 'negative_response', \"\n",
    "            \"covering the full cross-examination as it occurred. \"\n",
    "            \"Return the result as a list of 3 to 7 question/answer pairs, each clearly labeled.\"\n",
    "        ),\n",
    "        llm_config=cross_ex_llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    cross_ex_iterations = 0  # Track the number of Q&A iterations\n",
    "\n",
    "    def cross_ex_speaker_selection(last_speaker, groupchat):\n",
    "        nonlocal cross_ex_iterations\n",
    "        # Alternate between affirmative and negative, then finish with summary agent\n",
    "        if cross_ex_iterations == 0 and last_speaker is None:\n",
    "            return affirmative_cross_ex_agent\n",
    "        if last_speaker is affirmative_cross_ex_agent:\n",
    "            return negative_cross_ex_agent\n",
    "        if last_speaker is negative_cross_ex_agent:\n",
    "            cross_ex_iterations += 1\n",
    "            if cross_ex_iterations >= 7:\n",
    "                return cross_ex_summary_agent\n",
    "            else:\n",
    "                return affirmative_cross_ex_agent\n",
    "        if last_speaker is cross_ex_summary_agent:\n",
    "            return None\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[affirmative_cross_ex_agent, negative_cross_ex_agent, cross_ex_summary_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=cross_ex_speaker_selection\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # The context for the cross-examination is both the 1AC and the 1NC\n",
    "    chat_result = negative_cross_ex_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"You are about to begin the 1AC's cross-examination of the 1NC. \"\n",
    "            f\"The 1AC is as follows:\\n\\n{debate_case}\\n\\n\"\n",
    "            f\"The 1NC is as follows:\\n\\n{negative_case_html}\\n\\n\"\n",
    "            \"Begin by asking your first question.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # The summary agent's output is the last message in the chat history\n",
    "    cross_ex_json = chat_result.chat_history[-1][\"content\"]\n",
    "    cross_ex_data = json.loads(cross_ex_json)\n",
    "    cross_ex_pairs = cross_ex_data[\"cross_ex\"]\n",
    "\n",
    "    # Format as HTML for display\n",
    "    html = \"<h2>1AC Cross-Examination of the 1NC</h2>\\n\"\n",
    "    for i, pair in enumerate(cross_ex_pairs, 1):\n",
    "        html += f\"<div><b>Affirmative Question {i}:</b> {pair['affirmative_question']}</div>\\n\"\n",
    "        html += f\"<div><b>Negative Response {i}:</b> {pair['negative_response']}</div>\\n\"\n",
    "        html += \"<br/>\\n\"\n",
    "\n",
    "    return html\n",
    "\n",
    "# For compatibility with the rest of the code, assign to 1nc_crossex_html\n",
    "# Usage: 1nc_crossex_html = simulate_1ac_cross_examination_of_1nc(debate_case, negative_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "attempts = 0\n",
    "max_attempts = 3\n",
    "while True:\n",
    "    try:\n",
    "        one_ac_crossx = simulate_1ac_cross_examination_of_1nc(debate_case, negative_case_html)\n",
    "        break\n",
    "    except Exception as e:\n",
    "        attempts += 1\n",
    "        if attempts >= max_attempts:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(one_ac_crossx))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "negative_case_html = negative_case_html + one_ac_crossx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2AC"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2AC Gather Cards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_2ac_evidence_to_case(debate_case, negative_case_html):\n",
    "    \"\"\"\n",
    "    Simulates the affirmative constructing the 2AC.\n",
    "    Focuses on gathering new 2AC cards that answer the arguments the 1AC is most vulnerable to given the 1NC.\n",
    "    The selected cards, alongside the 1AC, should give the 2AC what it needs to win the debate round.\n",
    "    This version does NOT write a full 2AC rebuttal speech; it only selects and presents the new 2AC cards.\n",
    "    All cards must support the 1AC and the affirmative position.\n",
    "    \"\"\"\n",
    " \n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=7)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 2AC Agent: Focused on gathering the most strategic, high-impact 2AC cards\n",
    "    two_ac_agent = ConversableAgent(\n",
    "        name=\"two_ac_agent\",\n",
    "        system_message=(\n",
    "            \"You are an expert affirmative policy debater preparing the 2AC. \"\n",
    "            \"You have access to the full debate_case (including the 1AC) and the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc). \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Carefully read and analyze all 1NC off-case positions (theory, disadvantages, counterplans, kritiks, etc) and the 1AC.\\n\"\n",
    "            \"2. Identify which 1NC arguments the 1AC is most vulnerable to—these are the arguments that, if left unanswered or insufficiently answered, would most likely cause the affirmative to lose the debate round.\\n\"\n",
    "            \"3. For each of these most threatening 1NC arguments, research and select the most strategic, high-quality, unique cards (evidence) that directly answer and refute those arguments, while also further entrenching and extending the 1AC's core claims. \"\n",
    "            \"Each card must be:\\n\"\n",
    "            \"- Directly responsive to a specific 1NC argument that poses a significant threat to the 1AC\\n\"\n",
    "            \"- Not duplicative of any previous 1AC or 1NC card (do NOT select any card that is already in the 1AC or 1NC)\\n\"\n",
    "            \"- Clearly marked with its cite and a retagged argument as it would be read outloud in the debate round\\n\"\n",
    "            \"- Accompanied by a reason to include it in the 2AC, specifically explaining how it helps the 2AC win the round against the most dangerous 1NC arguments\\n\"\n",
    "            \"- Most importantly, every card you select must support the 1AC and the affirmative position. Do not select any card that undermines or contradicts the affirmative case or the 1AC's core claims.\\n\"\n",
    "            \"Do NOT write a 2AC rebuttal speech. Only select and present the new 2AC cards with their tags, cites, and reasons to include.\\n\"\n",
    "            \"Prioritize quality and strategic value over quantity: select only as many cards as are necessary to decisively answer the 1NC's most dangerous arguments and secure a winning position for the 2AC.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 2AC Evidence Evaluator: Ensures only the most strategic, responsive cards are included\n",
    "    two_ac_eval_agent = ConversableAgent(\n",
    "        name=\"two_ac_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether each piece of evidence proposed for the 2AC meets the highest standards for inclusion:\\n\"\n",
    "            \"- Is it directly responsive to a 1NC argument that the 1AC is most vulnerable to?\\n\"\n",
    "            \"- Is it unique (not duplicative of any 1AC or 1NC card—do NOT approve any card that is already in the 1AC or 1NC)?\\n\"\n",
    "            \"- Is it authoritative and empirically supported?\\n\"\n",
    "            \"- Is it strategically valuable for the 2AC, meaning it helps the 2AC win the round against the most dangerous 1NC arguments?\\n\"\n",
    "            \"- Is it clearly retagged and recut for 2AC use?\\n\"\n",
    "            \"- Most importantly, does it support the 1AC and the affirmative position? Reject any card that undermines or contradicts the affirmative case or the 1AC's core claims.\\n\"\n",
    "            \"Reject any card that does not meet all criteria. Only approve cards that are directly responsive to the 1NC's most threatening arguments, unique, strategically valuable for the 2AC, and affirm the 1AC. Do NOT approve any card that is already in the 1AC or 1NC.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    # Search agent for evidence\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "    max_iterations = 3  # Allow for more cards if needed, but focus on quality over quantity\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return two_ac_agent\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return two_ac_eval_agent\n",
    "\n",
    "        if last_speaker is two_ac_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= max_iterations:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is two_ac_agent:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[two_ac_agent, debate_search_agent, executor_agent, two_ac_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=60,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case (HTML, including 1AC) and negative_case_html (HTML, the 1NC) for context\n",
    "    chat_result = two_ac_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"{debate_case}\\n\\n\"\n",
    "            f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"You are the 2AC. Your job is to:\\n\"\n",
    "            \"- Extend all 1AC cards and arguments\\n\"\n",
    "            \"- Identify which 1NC arguments the 1AC is most vulnerable to and which are most likely to decide the round if left unanswered\\n\"\n",
    "            \"- For each of these most threatening 1NC arguments, research and present the most strategic, high-quality, unique cards that directly answer and refute them, while also further entrenching and extending the 1AC's core claims\\n\"\n",
    "            \"- For each card, provide its cite, a reason to include (explaining how it helps the 2AC win the round against the most dangerous 1NC arguments), and a retagged argument as it would be read outloud in the debate round\\n\"\n",
    "            \"- Most importantly, every card you select must support the 1AC and the affirmative position. Do not select any card that undermines or contradicts the affirmative case or the 1AC's core claims.\\n\"\n",
    "            \"- Do NOT select any card that is already in the 1AC or 1NC. All 2AC cards must be new and not previously used in the 1AC or 1NC.\\n\"\n",
    "            \"Do NOT write a 2AC rebuttal speech. Only select and present the new 2AC cards with their tags, cites, and reasons to include.\\n\"\n",
    "            \"Prioritize quality and strategic value over quantity: select only as many cards as are necessary to decisively answer the 1NC's most dangerous arguments and secure a winning position for the 2AC.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    two_ac_result_raw = chat_result.chat_history[-1][\"content\"]\n",
    "    two_ac_result_json = json.loads(two_ac_result_raw)\n",
    "    cards = two_ac_result_json[\"cards\"]\n",
    "\n",
    "    # Build the 2AC HTML string\n",
    "    two_ac_html = \"<div class='two-ac-section'>\\n\"\n",
    "    two_ac_html += \"<h1>2AC</h1>\\n\"\n",
    "\n",
    "    # Add each new 2AC card to the 2AC HTML\n",
    "    for idx, card_json in enumerate(cards):\n",
    "        card_id = card_json[\"id\"]\n",
    "        retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "        reason_to_include = card_json.get(\"reason_to_include\", \"\")\n",
    "        card_doc = get_document_by_id(card_id)\n",
    "        card_markup = str(card_doc['markup'])\n",
    "        two_ac_html += (\n",
    "            f\"\\n<h2>2AC Card {idx+1}</h2>\"\n",
    "            f\"\\n<div><p><strong></strong> {retagged_argument}</p></div>\"\n",
    "            f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "        )\n",
    "\n",
    "    two_ac_html += \"\\n</div>\"\n",
    "\n",
    "    return two_ac_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ac_case = None\n",
    "last_exception = None\n",
    "for attempt in range(3):\n",
    "    try:\n",
    "        ac_case = add_2ac_evidence_to_case(debate_case, negative_case_html)\n",
    "        break\n",
    "    except Exception as e:\n",
    "        last_exception = e\n",
    "        if attempt == 2:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(ac_case))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2AC: Write Speech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_2ac_to_case(debate_case, negative_case_html, twoac_case_html):\n",
    "    \"\"\"\n",
    "    Generates a complete, high-quality 2AC debate speech transcript.\n",
    "    The function takes as input the debate_case (HTML, including 1AC), negative_case_html (HTML, the 1NC), and twoac_case_html (HTML for the 2AC section).\n",
    "    It appends a full, iteratively drafted 2AC transcript to twoac_case_html.\n",
    "    The 2AC transcript is output in HTML format similar to the input documents.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 2AC Drafter: Writes the initial and revised 2AC speeches\n",
    "    two_ac_drafter = ConversableAgent(\n",
    "        name=\"two_ac_drafter\",\n",
    "        system_message=(\n",
    "            \"You are an expert affirmative policy debater preparing the 2AC speech. \"\n",
    "            \"You have access to the full debate_case (including the 1AC) and the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc). \"\n",
    "            \"Your job is to write a complete, high-quality, persuasive, and well-organized 2AC speech transcript. \"\n",
    "            \"The speech should:\\n\"\n",
    "            \"- Extend all 1AC arguments and evidence\\n\"\n",
    "            \"- Directly answer and refute all of the 1NC arguments (off-case and on-case)\\n\"\n",
    "            \"- Clearly signpost and flow arguments (e.g., 'On the DA...', 'On the Kritik...', 'On Topicality...', etc.)\\n\"\n",
    "            \"- Explain why the 2AC wins the round\\n\"\n",
    "            \"- Use debate jargon and structure as in a real 2AC speech\\n\"\n",
    "            \"- Be extremely long, highly detailed, and complete—covering all major 1NC arguments and providing clear, specific, line-by-line answers\\n\"\n",
    "            \"- Be written as a transcript, as if the 2AC is being read aloud in a debate round\\n\"\n",
    "            \"- When answering counterplans and kritiks, you may and should include debate permutations (such as 'perm do both', 'perm do the plan', etc.) if and only if they are strategic in the context of the round. Do not use permutations automatically—only include them if they are likely to be effective and relevant against the specific counterplan or kritik presented in the 1NC.\\n\"\n",
    "            \"Do NOT simply list evidence or cards—write the full speech, integrating evidence and arguments as a debater would.\\n\"\n",
    "            \"IMPORTANT: Output the 2AC speech transcript in HTML format, using <div class='two-ac-section'>, <h1>2AC Speech</h1>, and <div class='twoac-transcript'> as containers, and use <p>, <h2>, <h3>, <ul>, <li>, <b>, <strong>, <em>, <br/>, and other HTML tags as appropriate for structure and readability. Do NOT use <pre> or markdown formatting. The output should closely match the HTML style of the input documents.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 2AC Coach: Reviews and suggests improvements for the 2AC speech\n",
    "    two_ac_coach = ConversableAgent(\n",
    "        name=\"two_ac_coach\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced debate coach and judge. \"\n",
    "            \"Your job is to review the 2AC speech draft and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Argument coverage: Did the 2AC answer all the most important 1NC arguments?\\n\"\n",
    "            \"- Strategic focus: Did the 2AC collapse to the best ground and avoid spreading too thin?\\n\"\n",
    "            \"- Clarity and organization: Is the speech easy to flow and follow?\\n\"\n",
    "            \"- Persuasiveness and use of evidence: Are arguments well-supported and explained?\\n\"\n",
    "            \"- Realism: Does the speech sound like a real, high-level 2AC?\\n\"\n",
    "            \"Suggest specific improvements, then ask the debater to revise the speech accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between drafter and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: drafter writes initial speech\n",
    "        if len(groupchat.messages) == 0:\n",
    "            return two_ac_drafter\n",
    "        # Drafter just wrote: coach reviews\n",
    "        if last_speaker is two_ac_drafter:\n",
    "            return None\n",
    "        if last_speaker is two_ac_coach:\n",
    "            return two_ac_drafter\n",
    "        return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[two_ac_drafter, two_ac_coach],\n",
    "        messages=[],\n",
    "        max_round=4,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the 2AC drafter\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"Write a complete, high-quality, realistic 2AC speech transcript as if you are reading it aloud in a debate round. \"\n",
    "        \"Cover all major 1NC arguments, extend the 1AC, and collapse strategically. \"\n",
    "        \"Use debate structure and jargon. Do not simply list evidence—write the full speech. \"\n",
    "        \"When answering counterplans and kritiks, you may and should include debate permutations (such as 'perm do both', 'perm do the plan', etc.) if and only if they are strategic in the context of the round. Do not use permutations automatically—only include them if they are likely to be effective and relevant against the specific counterplan or kritik presented in the 1NC.\\n\"\n",
    "        \"IMPORTANT: Output the 2AC speech transcript in HTML format, using <div class='two-ac-section'>, <h1>2AC Speech</h1>, and <div class='twoac-transcript'> as containers, and use <p>, <h2>, <h3>, <ul>, <li>, <b>, <strong>, <em>, <br/>, and other HTML tags as appropriate for structure and readability. Do NOT use <pre> or markdown formatting. The output should closely match the HTML style of the input documents.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = two_ac_coach.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last 2AC speech draft in the chat history\n",
    "    # (The drafter's last message)\n",
    "    transcript = chat_result.chat_history[-1][\"content\"]\n",
    "\n",
    "    # If the transcript already contains the outer HTML structure, avoid double-wrapping\n",
    "    if \"<div class='two-ac-section'>\" in transcript:\n",
    "        twoac_case_html += \"\\n\" + transcript + \"\\n\"\n",
    "    else:\n",
    "        twoac_case_html += \"\\n<div class='two-ac-section'>\\n\"\n",
    "        twoac_case_html += \"<h1>2AC Speech</h1>\\n\"\n",
    "        twoac_case_html += f\"<div class='twoac-transcript'>{transcript}</div>\\n\"\n",
    "        twoac_case_html += \"</div>\\n\"\n",
    "\n",
    "    return twoac_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ac_case = add_2ac_to_case(debate_case, negative_case_html, ac_case)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(new_ac_case))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ac_case"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cross Examination (of the 2AC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate_1nc_cross_examination_of_2ac(\n",
    "    debate_case: str,\n",
    "    negative_case_html: str,\n",
    "    twoac_case_html: str\n",
    ") -> str:\n",
    "    \"\"\"\n",
    "    Simulates the cross-examination of the 2AC by the negative.\n",
    "    Takes the 1AC (debate_case), the 1NC (negative_case_html), and the 2AC (twoac_case_html) as input,\n",
    "    and returns a formatted HTML string of the cross-examination.\n",
    "    \"\"\"\n",
    "    from typing import List\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    # Define the structure for a cross-examination question and answer\n",
    "    class CrossExPair(BaseModel):\n",
    "        negative_question: str\n",
    "        affirmative_response: str\n",
    "\n",
    "    class CrossExamination(BaseModel):\n",
    "        cross_ex: List[CrossExPair] = Field(..., min_items=4, max_items=4)\n",
    "\n",
    "    # LLM config for all agents\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    cross_ex_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=CrossExamination,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent 1: Negative asks questions\n",
    "    negative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"negative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 1NC (negative) debater in a policy debate cross-examination. \"\n",
    "            \"Your job is to ask sharp, strategic, and challenging questions about the 2AC (affirmative's second constructive) just presented. \"\n",
    "            \"Focus on exposing weaknesses, ambiguities, or assumptions in the affirmative's extensions, answers to the 1NC, and overall strategy. \"\n",
    "            \"Ask one question at a time, and wait for the affirmative to answer before asking the next. \"\n",
    "            \"Do not answer your own questions. \"\n",
    "            \"Be concise and direct. \"\n",
    "            \"Do not repeat questions. \"\n",
    "            \"You will ask a total of 3 to 7 questions.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 2: Affirmative answers\n",
    "    affirmative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"affirmative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 2AC (affirmative) debater being cross-examined by the 1NC (negative) in a policy debate. \"\n",
    "            \"Your job is to answer each question as clearly, persuasively, and strategically as possible, defending the affirmative case and the 2AC. \"\n",
    "            \"Respond directly to the negative's question, but do not volunteer extra information. \"\n",
    "            \"Be concise and avoid rambling. \"\n",
    "            \"Do not ask questions yourself.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 3: Cross-ex summary agent (outputs the structured Q&A)\n",
    "    cross_ex_summary_agent = ConversableAgent(\n",
    "        name=\"cross_ex_summary_agent\",\n",
    "        system_message=(\n",
    "            \"You are a debate judge summarizing the 1NC's cross-examination of the 2AC. \"\n",
    "            \"Your job is to produce a structured list of question/answer pairs, each with a 'negative_question' and an 'affirmative_response', \"\n",
    "            \"covering the full cross-examination as it occurred. \"\n",
    "            \"Return the result as a list of 3 to 7 question/answer pairs, each clearly labeled.\"\n",
    "        ),\n",
    "        llm_config=cross_ex_llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    cross_ex_iterations = 0  # Track the number of Q&A iterations\n",
    "\n",
    "    def cross_ex_speaker_selection(last_speaker, groupchat):\n",
    "        nonlocal cross_ex_iterations\n",
    "        # Alternate between negative and affirmative, then finish with summary agent\n",
    "        if cross_ex_iterations == 0 and last_speaker is None:\n",
    "            return negative_cross_ex_agent\n",
    "        if last_speaker is negative_cross_ex_agent:\n",
    "            return affirmative_cross_ex_agent\n",
    "        if last_speaker is affirmative_cross_ex_agent:\n",
    "            cross_ex_iterations += 1\n",
    "            if cross_ex_iterations >= 4:\n",
    "                return cross_ex_summary_agent\n",
    "            else:\n",
    "                return negative_cross_ex_agent\n",
    "        if last_speaker is cross_ex_summary_agent:\n",
    "            return None\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[negative_cross_ex_agent, affirmative_cross_ex_agent, cross_ex_summary_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=cross_ex_speaker_selection\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # The context for the cross-examination is the 1AC, 1NC, and 2AC\n",
    "    chat_result = affirmative_cross_ex_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"You are about to begin the 1NC's cross-examination of the 2AC. \"\n",
    "            f\"The 1AC is as follows:\\n\\n{debate_case}\\n\\n\"\n",
    "            f\"The 1NC is as follows:\\n\\n{negative_case_html}\\n\\n\"\n",
    "            f\"The 2AC is as follows:\\n\\n{twoac_case_html}\\n\\n\"\n",
    "            \"Begin by asking your first question.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # The summary agent's output is the last message in the chat history\n",
    "    cross_ex_json = chat_result.chat_history[-1][\"content\"]\n",
    "    cross_ex_data = json.loads(cross_ex_json)\n",
    "    cross_ex_pairs = cross_ex_data[\"cross_ex\"]\n",
    "\n",
    "    # Format as HTML for display\n",
    "    html = \"<h2>1NC Cross-Examination of the 2AC</h2>\\n\"\n",
    "    for i, pair in enumerate(cross_ex_pairs, 1):\n",
    "        html += f\"<div><b>Negative Question {i}:</b> {pair['negative_question']}</div>\\n\"\n",
    "        html += f\"<div><b>Affirmative Response {i}:</b> {pair['affirmative_response']}</div>\\n\"\n",
    "        html += \"<br/>\\n\"\n",
    "\n",
    "    return html\n",
    "\n",
    "# For compatibility with the rest of the code, assign to 2ac_crossex_html\n",
    "# Usage: 2ac_crossex_html = simulate_1nc_cross_examination_of_2ac(debate_case, negative_case_html, twoac_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_attempts = 3\n",
    "for attempt in range(1, max_attempts + 1):\n",
    "    try:\n",
    "        two_nc_crossx = simulate_1nc_cross_examination_of_2ac(debate_case, negative_case_html, new_ac_case)\n",
    "        break\n",
    "    except Exception as e:\n",
    "        if attempt == max_attempts:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(two_nc_crossx))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ac_case = new_ac_case + two_nc_crossx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_ac_case"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2NC"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2NC Gather Cards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_2nc_evidence_to_case(debate_case, negative_case_html, twoac_debate_case_html):\n",
    "    \"\"\"\n",
    "    Simulates the negative constructing the 2NC.\n",
    "    Focuses on gathering new 2NC cards that answer the arguments the 2AC is most vulnerable to given the 1AC, 1NC, and 2AC.\n",
    "    The selected cards, alongside the 1NC, should give the 2NC what it needs to win the debate round.\n",
    "    This version does NOT write a full 2NC rebuttal speech; it only selects and presents the new 2NC cards.\n",
    "    All cards must support the negative and the negative position.\n",
    "    \"\"\"\n",
    "\n",
    "    class DebateCard(BaseModel):\n",
    "        id: int\n",
    "        cite: str\n",
    "        include_in_case: Literal[\"include_it\", \"False\"]\n",
    "        reason_to_include: str\n",
    "        retagged_argument_as_read_outloud_in_the_debate_round: str\n",
    "\n",
    "    class DebateCardSearchResult(BaseModel):\n",
    "        cards: List[DebateCard] = Field(..., min_items=1, max_items=6)\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    required_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        tool_choice=\"required\",\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "    debate_eval_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=DebateCardSearchResult,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 2NC Agent: Focused on gathering the most strategic, high-impact 2NC cards\n",
    "    two_nc_agent = ConversableAgent(\n",
    "        name=\"two_nc_agent\",\n",
    "        system_message=(\n",
    "            \"You are an expert negative policy debater preparing the 2NC. \"\n",
    "            \"You have access to the full debate_case (including the 1AC), the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc), \"\n",
    "            \"and the 2AC_debate_case_html (the 2AC, including all new cards and arguments). \"\n",
    "            \"Your job is to:\\n\"\n",
    "            \"1. Carefully read and analyze all 1AC, 1NC, and 2AC arguments and evidence.\\n\"\n",
    "            \"2. Identify which 2AC arguments and cards are most threatening to the negative's winning strategy—these are the arguments that, if left unanswered or insufficiently answered, would most likely cause the negative to lose the debate round.\\n\"\n",
    "            \"3. For each of these most threatening 2AC arguments, research and select the most strategic, high-quality, unique cards (evidence) that directly answer and refute those arguments, while also further entrenching and extending the negative's core claims. \"\n",
    "            \"Each card must be:\\n\"\n",
    "            \"- Directly responsive to a specific 2AC argument that poses a significant threat to the negative\\n\"\n",
    "            \"- Not duplicative of any previous 1AC, 1NC, or 2AC card (do NOT select any card that is already in the 1AC, 1NC, or 2AC)\\n\"\n",
    "            \"- Clearly marked with its cite and a retagged argument as it would be read outloud in the debate round\\n\"\n",
    "            \"- Accompanied by a reason to include it in the 2NC, specifically explaining how it helps the negative win the round against the most dangerous 2AC arguments\\n\"\n",
    "            \"- Most importantly, every card you select must support the negative and the negative position. Do not select any card that undermines or contradicts the negative case or the 1NC's core claims.\\n\"\n",
    "            \"Do NOT write a 2NC rebuttal speech. Only select and present the new 2NC cards with their tags, cites, and reasons to include.\\n\"\n",
    "            \"Prioritize quality and strategic value over quantity: select only as many cards as are necessary to decisively answer the 2AC's most dangerous arguments and secure a winning position for the negative.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 2NC Evidence Evaluator: Ensures only the most strategic, responsive cards are included\n",
    "    two_nc_eval_agent = ConversableAgent(\n",
    "        name=\"two_nc_eval_agent\",\n",
    "        system_message=(\n",
    "            \"You are a highly rigorous debate coach and argument analyst. \"\n",
    "            \"Your job is to strictly evaluate whether each piece of evidence proposed for the 2NC meets the highest standards for inclusion:\\n\"\n",
    "            \"- Is it directly responsive to a 2AC argument that the negative is most vulnerable to?\\n\"\n",
    "            \"- Is it unique (not duplicative of any 1AC, 1NC, or 2AC card—do NOT approve any card that is already in the 1AC, 1NC, or 2AC)?\\n\"\n",
    "            \"- Is it authoritative and empirically supported?\\n\"\n",
    "            \"- Is it strategically valuable for the 2NC, meaning it helps the negative win the round against the most dangerous 2AC arguments?\\n\"\n",
    "            \"- Is it clearly retagged and recut for 2NC use?\\n\"\n",
    "            \"- Most importantly, does it support the negative and the negative position? Reject any card that undermines or contradicts the negative case or the 1NC's core claims.\\n\"\n",
    "            \"Reject any card that does not meet all criteria. Only approve cards that are directly responsive to the 2AC's most threatening arguments, unique, strategically valuable for the 2NC, and affirm the negative. Do NOT approve any card that is already in the 1AC, 1NC, or 2AC.\"\n",
    "        ),\n",
    "        llm_config=debate_eval_llm_config,\n",
    "    )\n",
    "\n",
    "    # Search agent for evidence\n",
    "    debate_search_agent = ConversableAgent(\n",
    "        name=\"debate_search_agent\",\n",
    "        system_message=\"You are a helpful assistant that can search the debate evidence dataset for a given tag. Your query will retrieve a list of debate cards.\",\n",
    "        llm_config=required_llm_config,\n",
    "    )\n",
    "\n",
    "    executor_agent = ConversableAgent(\n",
    "        name=\"executor_agent\",\n",
    "        human_input_mode=\"NEVER\",\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    register_function(\n",
    "        search_debate_cards,\n",
    "        caller=debate_search_agent,\n",
    "        executor=executor_agent,\n",
    "        description=\"Search the debate evidence dataset using natural language queries. Return a list of debate cards.\",\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    iterations = 0\n",
    "    max_iterations = 3  # Allow for more cards if needed, but focus on quality over quantity\n",
    "\n",
    "    def custom_speaker_selection_func(last_speaker: Agent, groupchat: GroupChat):\n",
    "        nonlocal iterations\n",
    "        messages = groupchat.messages\n",
    "\n",
    "        if len(messages) <= 1:\n",
    "            return two_nc_agent\n",
    "\n",
    "        if last_speaker is debate_search_agent:\n",
    "            return executor_agent\n",
    "\n",
    "        if last_speaker is executor_agent:\n",
    "            return two_nc_eval_agent\n",
    "\n",
    "        if last_speaker is two_nc_eval_agent:\n",
    "            if \"include_it\" in messages[-1][\"content\"]:\n",
    "                iterations += 1\n",
    "                if iterations >= max_iterations:\n",
    "                    return None\n",
    "                else:\n",
    "                    return debate_search_agent\n",
    "            else:\n",
    "                return debate_search_agent\n",
    "\n",
    "        if last_speaker is two_nc_agent:\n",
    "            return debate_search_agent\n",
    "        else:\n",
    "            return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[two_nc_agent, debate_search_agent, executor_agent, two_nc_eval_agent],\n",
    "        messages=[],\n",
    "        max_round=60,\n",
    "        speaker_selection_method=custom_speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Use the debate_case (HTML, including the 1AC, which is the Affirmative Constructive and represents the affirmative team's advocacy and evidence), negative_case_html (HTML, the 1NC, which is the Negative Constructive), and twoac_debate_case_html (HTML, the 2AC, which is the Affirmative's Second Constructive) for context\n",
    "    chat_result = debate_search_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"Negative Case (1NC) (Our Team):\\n{negative_case_html}\\n\\n\"\n",
    "            f\"Affirmative Case (1AC):\\n{debate_case}\\n\\n\"\n",
    "            f\"Note: The 1AC (Affirmative Constructive) is the initial speech and evidence presented by the affirmative team, outlining their advocacy and core arguments.\\n\\n\"\n",
    "            f\"2AC (Second Affirmative Constructive):\\n{twoac_debate_case_html}\\n\\n\"\n",
    "            \"Assume that the current year is 2022.\\n\"\n",
    "            \"You are the 2NC. Your job is to:\\n\"\n",
    "            \"- Extend all 1NC cards and arguments\\n\"\n",
    "            \"- Identify which 2AC arguments the negative is most vulnerable to and which are most likely to decide the round if left unanswered\\n\"\n",
    "            \"- For each of these most threatening 2AC arguments, research and present the most strategic, high-quality, unique cards that directly answer and refute them, while also further entrenching and extending the negative's core claims\\n\"\n",
    "            \"- For each card, provide its cite, a reason to include (explaining how it helps the 2NC win the round against the most dangerous 2AC arguments), and a retagged argument as it would be read outloud in the debate round\\n\"\n",
    "            \"- Most importantly, every card you select must support the negative and the negative position. Do not select any card that undermines or contradicts the negative case or the 1NC's core claims.\\n\"\n",
    "            \"- Do NOT select any card that is already in the 1AC, 1NC, or 2AC. All 2NC cards must be new and not previously used in the 1AC, 1NC, or 2AC.\\n\"\n",
    "            \"Do NOT write a 2NC rebuttal speech. Only select and present the new 2NC cards with their tags, cites, and reasons to include.\\n\"\n",
    "            \"Prioritize quality and strategic value over quantity: select only as many cards as are necessary to decisively answer the 2AC's most dangerous arguments and secure a winning position for the negative.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    two_nc_result_raw = chat_result.chat_history[-1][\"content\"]\n",
    "    two_nc_result_json = json.loads(two_nc_result_raw)\n",
    "    cards = two_nc_result_json[\"cards\"]\n",
    "\n",
    "    # Build the 2NC HTML string\n",
    "    two_nc_html = \"<div class='two-nc-section'>\\n\"\n",
    "    two_nc_html += \"<h1>2NC</h1>\\n\"\n",
    "\n",
    "    # Add each new 2NC card to the 2NC HTML\n",
    "    for idx, card_json in enumerate(cards):\n",
    "        card_id = card_json[\"id\"]\n",
    "        retagged_argument = card_json.get(\"retagged_argument_as_read_outloud_in_the_debate_round\", \"\")\n",
    "        reason_to_include = card_json.get(\"reason_to_include\", \"\")\n",
    "        card_doc = get_document_by_id(card_id)\n",
    "        card_markup = str(card_doc['markup'])\n",
    "        two_nc_html += (\n",
    "            f\"\\n<h2>2NC Card {idx+1}</h2>\"\n",
    "            f\"\\n<div><p><strong></strong> {retagged_argument}</p></div>\"\n",
    "            f\"\\n<div><p>{card_markup}</p></div>\"\n",
    "        )\n",
    "\n",
    "    two_nc_html += \"\\n</div>\"\n",
    "\n",
    "    return two_nc_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_attempts = 3\n",
    "for attempt in range(max_attempts):\n",
    "    try:\n",
    "        two_nc_html = add_2nc_evidence_to_case(debate_case, negative_case_html, new_ac_case)\n",
    "        break\n",
    "    except Exception as e:\n",
    "        if attempt == max_attempts - 1:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(two_nc_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nc_html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2NC: Write Speech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_2nc_to_case(debate_case, negative_case_html, twoac_case_html, two_nc_html):\n",
    "    \"\"\"\n",
    "    Generates a complete, high-quality 2NC debate speech transcript.\n",
    "    The function takes as input the debate_case (HTML, including 1AC), negative_case_html (HTML, the 1NC), twoac_case_html (HTML for the 2AC section), and two_nc_html (HTML for the 2NC cards).\n",
    "    It appends a full, iteratively drafted 2NC transcript to two_nc_html and returns the updated HTML.\n",
    "    The 2NC speech transcript will be output in HTML format (using <div>, <p>, <h2>, <ul>, <ol>, etc.), not in markdown or <pre> tags.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 2NC Drafter: Writes the initial and revised 2NC speeches\n",
    "    two_nc_drafter = ConversableAgent(\n",
    "        name=\"two_nc_drafter\",\n",
    "        system_message=(\n",
    "            \"You are an expert negative policy debater preparing the 2NC speech. \"\n",
    "            \"You have access to the full debate_case (including the 1AC), the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc), the 2AC (including all arguments and evidence), and the 2NC cards. \"\n",
    "            \"Your job is to write a complete, high-quality, persuasive, and well-organized 2NC speech transcript. \"\n",
    "            \"The speech should:\\n\"\n",
    "            \"- Extend all 1NC arguments and evidence\\n\"\n",
    "            \"- Directly answer and refute all of the 2AC arguments (off-case and on-case)\\n\"\n",
    "            \"- Clearly signpost and flow arguments (e.g., 'On the DA...', 'On the Kritik...', 'On Topicality...', etc.)\\n\"\n",
    "            \"- Explain why the negative wins the round\\n\"\n",
    "            \"- Use debate jargon and structure as in a real 2NC speech\\n\"\n",
    "            \"- Be extremely long, highly detailed, and complete—covering all major 2AC arguments and providing clear, specific, line-by-line answers\\n\"\n",
    "            \"- Be written as a transcript, as if the 2NC is being read aloud in a debate round\\n\"\n",
    "            \"- When answering counterplans and kritiks, you may and should include debate permutations (such as 'perm do both', 'perm do the plan', etc.) if and only if they are strategic in the context of the round. Do not use permutations automatically—only include them if they are likely to be effective and relevant against the specific counterplan or kritik presented in the 2AC.\\n\"\n",
    "            \"Do NOT simply list evidence or cards—write the full speech, integrating evidence and arguments as a debater would.\\n\"\n",
    "            \"IMPORTANT: Output the 2NC speech transcript in HTML format, using <div>, <h2>, <h3>, <p>, <ul>, <ol>, <b>, <i>, and similar tags. Do NOT use <pre> or markdown formatting. The output should be visually similar to the input card HTML formats.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 2NC Coach: Reviews and suggests improvements for the 2NC speech\n",
    "    two_nc_coach = ConversableAgent(\n",
    "        name=\"two_nc_coach\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced debate coach and judge. \"\n",
    "            \"Your job is to review the 2NC speech draft and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Argument coverage: Did the 2NC answer all the most important 2AC arguments?\\n\"\n",
    "            \"- Strategic focus: Did the 2NC collapse to the best ground and avoid spreading too thin?\\n\"\n",
    "            \"- Clarity and organization: Is the speech easy to flow and follow?\\n\"\n",
    "            \"- Persuasiveness and use of evidence: Are arguments well-supported and explained?\\n\"\n",
    "            \"- Realism: Does the speech sound like a real, high-level 2NC?\\n\"\n",
    "            \"Suggest specific improvements, then ask the debater to revise the speech accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between drafter and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: drafter writes initial speech\n",
    "        if len(groupchat.messages) == 0:\n",
    "            return two_nc_drafter\n",
    "        # Drafter just wrote: coach reviews\n",
    "        if last_speaker is two_nc_drafter:\n",
    "            return None\n",
    "        if last_speaker is two_nc_coach:\n",
    "            return two_nc_drafter\n",
    "        return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[two_nc_drafter, two_nc_coach],\n",
    "        messages=[],\n",
    "        max_round=4,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the 2NC drafter\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Speech and Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        f\"2NC Cards:\\n{two_nc_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"Write a complete, high-quality, realistic 2NC speech transcript as if you are reading it aloud in a debate round. \"\n",
    "        \"Cover all major 2AC arguments, extend the 1NC, and collapse strategically. \"\n",
    "        \"Use debate structure and jargon. Do not simply list evidence—write the full speech. \"\n",
    "        \"When answering counterplans and kritiks, you may and should include debate permutations (such as 'perm do both', 'perm do the plan', etc.) if and only if they are strategic in the context of the round. Do not use permutations automatically—only include them if they are likely to be effective and relevant against the specific counterplan or kritik presented in the 2AC.\\n\"\n",
    "        \"IMPORTANT: Output the 2NC speech transcript in HTML format, using <div>, <h2>, <h3>, <p>, <ul>, <ol>, <b>, <i>, and similar tags. Do NOT use <pre> or markdown formatting. The output should be visually similar to the input card HTML formats.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = two_nc_coach.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last 2NC speech draft in the chat history\n",
    "    # (The drafter's last message)\n",
    "    transcript = chat_result.chat_history[-1][\"content\"]\n",
    "    # Append the 2NC transcript to the two_nc_html\n",
    "    two_nc_html += \"\\n<div class='two-nc-speech-section'>\\n\"\n",
    "    two_nc_html += \"<h1>2NC Speech</h1>\\n\"\n",
    "    two_nc_html += f\"<div class='two-nc-transcript'>{transcript}</div>\\n\"\n",
    "    two_nc_html += \"</div>\\n\"\n",
    "\n",
    "    return two_nc_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nc_html_full = add_2nc_to_case(debate_case, negative_case_html, new_ac_case, two_nc_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(two_nc_html_full))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nc_html_full"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cross Examination (of the 2NC)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate_2ac_cross_examination_of_2nc(\n",
    "    debate_case: str,\n",
    "    negative_case_html: str,\n",
    "    twoac_case_html: str,\n",
    "    twonc_case_html: str\n",
    ") -> str:\n",
    "    \"\"\"\n",
    "    Simulates the cross-examination of the 2NC by the 2AC.\n",
    "    Takes the 1AC (debate_case), the 1NC (negative_case_html), the 2AC (twoac_case_html), and the 2NC (twonc_case_html) as input,\n",
    "    and returns a formatted HTML string of the cross-examination.\n",
    "    The affirmative questions must always be designed to support the affirmative case and plan, and the negative responses must always reject the plan and support the negative counterplan, counteradvocacy, and negative positions.\n",
    "    \"\"\"\n",
    "    from typing import List\n",
    "    from pydantic import BaseModel, Field\n",
    "\n",
    "    # Define the structure for a cross-examination question and answer\n",
    "    class CrossExPair(BaseModel):\n",
    "        affirmative_question: str\n",
    "        negative_response: str\n",
    "\n",
    "    class CrossExamination(BaseModel):\n",
    "        cross_ex: List[CrossExPair] = Field(..., min_items=3, max_items=3)\n",
    "\n",
    "    # LLM config for all agents\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    cross_ex_llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        response_format=CrossExamination,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Agent 1: Affirmative asks questions\n",
    "    affirmative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"affirmative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 2AC (affirmative) debater in a policy debate cross-examination. \"\n",
    "            \"Your job is to ask sharp, strategic, and challenging questions about the 2NC (negative's second constructive) just presented. \"\n",
    "            \"Every question you ask must be designed to directly support the affirmative's case and plan, and to undermine the negative's arguments, counterplan, counteradvocacy, and all negative positions. \"\n",
    "            \"Focus on exposing weaknesses, ambiguities, or assumptions in the negative's extensions, new arguments, and overall strategy, \"\n",
    "            \"especially in ways that help the affirmative win the round and defend the plan. \"\n",
    "            \"Ask one question at a time, and wait for the negative to answer before asking the next. \"\n",
    "            \"Do not answer your own questions. \"\n",
    "            \"Be concise and direct. \"\n",
    "            \"Do not repeat questions. \"\n",
    "            \"You will ask a total of 3 to 7 questions.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 2: Negative answers\n",
    "    negative_cross_ex_agent = ConversableAgent(\n",
    "        name=\"negative_cross_ex_agent\",\n",
    "        system_message=(\n",
    "            \"You are the 2NC (negative) debater being cross-examined by the 2AC (affirmative) in a policy debate. \"\n",
    "            \"Your job is to answer each question as clearly, persuasively, and strategically as possible, always rejecting the plan and supporting the negative's counterplan, counteradvocacy, and all negative positions. \"\n",
    "            \"Every answer you give must be designed to help the negative win the round, reinforce the negative's arguments, and defend the negative's counterplan or advocacy against the plan. \"\n",
    "            \"Respond directly to the affirmative's question, but do not volunteer extra information. \"\n",
    "            \"Be concise and avoid rambling. \"\n",
    "            \"Do not ask questions yourself.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Agent 3: Cross-ex summary agent (outputs the structured Q&A)\n",
    "    cross_ex_summary_agent = ConversableAgent(\n",
    "        name=\"cross_ex_summary_agent\",\n",
    "        system_message=(\n",
    "            \"You are a debate judge summarizing the 2AC's cross-examination of the 2NC. \"\n",
    "            \"Your job is to produce a structured list of question/answer pairs, each with an 'affirmative_question' and a 'negative_response', \"\n",
    "            \"covering the full cross-examination as it occurred. \"\n",
    "            \"Each affirmative question must be designed to support the affirmative's case and plan, and each negative response must be designed to reject the plan and support the negative's counterplan, counteradvocacy, and negative positions. \"\n",
    "            \"Return the result as a list of 3 to 7 question/answer pairs, each clearly labeled.\"\n",
    "        ),\n",
    "        llm_config=cross_ex_llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    cross_ex_iterations = 0  # Track the number of Q&A iterations\n",
    "\n",
    "    def cross_ex_speaker_selection(last_speaker, groupchat):\n",
    "        nonlocal cross_ex_iterations\n",
    "        # Alternate between affirmative and negative, then finish with summary agent\n",
    "        if last_speaker is affirmative_cross_ex_agent:\n",
    "            return negative_cross_ex_agent\n",
    "        if last_speaker is negative_cross_ex_agent:\n",
    "            cross_ex_iterations += 1\n",
    "            if cross_ex_iterations >= 3:\n",
    "                return cross_ex_summary_agent\n",
    "            else:\n",
    "                return affirmative_cross_ex_agent\n",
    "        if last_speaker is cross_ex_summary_agent:\n",
    "            return None\n",
    "        return \"round_robin\"\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[affirmative_cross_ex_agent, negative_cross_ex_agent, cross_ex_summary_agent],\n",
    "        messages=[],\n",
    "        max_round=40,\n",
    "        speaker_selection_method=cross_ex_speaker_selection\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # The context for the cross-examination is the 1AC, 1NC, 2AC, and 2NC\n",
    "    chat_result = negative_cross_ex_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=(\n",
    "            f\"You are about to begin the 2AC's cross-examination of the 2NC. \"\n",
    "            f\"The 1AC is as follows:\\n\\n{debate_case}\\n\\n\"\n",
    "            f\"The 1NC is as follows:\\n\\n{negative_case_html}\\n\\n\"\n",
    "            f\"The 2AC is as follows:\\n\\n{twoac_case_html}\\n\\n\"\n",
    "            f\"The 2NC is as follows:\\n\\n{twonc_case_html}\\n\\n\"\n",
    "            \"Begin by asking your first question.\"\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # The summary agent's output is the last message in the chat history\n",
    "    cross_ex_json = chat_result.chat_history[-1][\"content\"]\n",
    "    cross_ex_data = json.loads(cross_ex_json)\n",
    "    cross_ex_pairs = cross_ex_data[\"cross_ex\"]\n",
    "\n",
    "    # Format as HTML for display\n",
    "    html = \"<h2>2AC Cross-Examination of the 2NC</h2>\\n\"\n",
    "    for i, pair in enumerate(cross_ex_pairs, 1):\n",
    "        html += f\"<div><b>Affirmative Question {i}:</b> {pair['affirmative_question']}</div>\\n\"\n",
    "        html += f\"<div><b>Negative Response {i}:</b> {pair['negative_response']}</div>\\n\"\n",
    "        html += \"<br/>\\n\"\n",
    "\n",
    "    return html\n",
    "\n",
    "# For compatibility with the rest of the code, assign to twonc_crossex_html\n",
    "# Usage: twonc_crossex_html = simulate_2ac_cross_examination_of_2nc(debate_case, negative_case_html, twoac_case_html, twonc_case_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "attempts = 0\n",
    "while True:\n",
    "    try:\n",
    "        two_ac_crossex = simulate_2ac_cross_examination_of_2nc(debate_case, negative_case_html, new_ac_case, two_nc_html_full)\n",
    "        break\n",
    "    except Exception as e:\n",
    "        attempts += 1\n",
    "        if attempts >= 3:\n",
    "            raise"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nc_html_full = two_nc_html_full + two_ac_crossex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nc_html_full"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1NR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_1nr_to_case(debate_case, negative_case_html, twoac_case_html, twonc_case_html):\n",
    "    \"\"\"\n",
    "    Generates a complete, high-quality 1NR debate speech transcript.\n",
    "    The function takes as input:\n",
    "        - debate_case (HTML, including 1AC)\n",
    "        - negative_case_html (HTML, the 1NC)\n",
    "        - twoac_case_html (HTML for the 2AC section)\n",
    "        - twonc_case_html (HTML for the 2NC section)\n",
    "    It returns a full, iteratively drafted 1NR transcript as onenr_case_html.\n",
    "    The 1NR should be shorter than the 2NC and focus on making arguments that are distinct from the 2NC.\n",
    "    The output will be in HTML format, similar to the input card formats, and should not use <pre> or markdown tags.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 1NR Drafter: Writes the initial and revised 1NR speeches\n",
    "    one_nr_drafter = ConversableAgent(\n",
    "        name=\"one_nr_drafter\",\n",
    "        system_message=(\n",
    "            \"You are an expert negative policy debater preparing the 1NR speech. \"\n",
    "            \"You have access to the full debate_case (including the 1AC), the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc), the 2AC, and the 2NC. \"\n",
    "            \"Your job is to write a complete, high-quality, persuasive, and well-organized 1NR speech transcript. \"\n",
    "            \"The speech should:\\n\"\n",
    "            \"- Be noticeably shorter than the 2NC, simulating the less time allocated to the 1NR in a real debate round.\\n\"\n",
    "            \"- Focus on making arguments that are distinct from those made in the 2NC, rather than repeating or rephrasing them. If possible, cover different off-case or on-case arguments, or provide new extensions, analytics, or strategic concessions.\\n\"\n",
    "            \"- Extend the best negative arguments and evidence from the 1NC and 2NC, but avoid duplicating the 2NC's content.\\n\"\n",
    "            \"- Directly answer and refute all of the 2AC arguments (off-case and on-case) that were not fully addressed by the 2NC, or that require additional negative development.\\n\"\n",
    "            \"- Clearly signpost and flow arguments (e.g., 'On the DA...', 'On the Kritik...', 'On Topicality...', etc.)\\n\"\n",
    "            \"- Explain why the negative is still winning the round\\n\"\n",
    "            \"- Use debate jargon and structure as in a real 1NR speech\\n\"\n",
    "            \"- Be detailed and complete—covering all major 2AC arguments relevant to the 1NR, and providing clear, specific, line-by-line answers, but do not attempt to be as long or comprehensive as the 2NC.\\n\"\n",
    "            \"- Be written as a transcript, as if the 1NR is being read aloud in a debate round\\n\"\n",
    "            \"- When answering affirmative permutations or new arguments, you may and should strategically concede any arguments from the 1NC or 2NC that you feel are weak, so that you can spend more time on the arguments that the negative is winning on. Do not simply list evidence or cards—write the full speech, integrating evidence and arguments as a debater would.\\n\"\n",
    "            \"IMPORTANT: Output the 1NR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 1NR Coach: Reviews and suggests improvements for the 1NR speech\n",
    "    one_nr_coach = ConversableAgent(\n",
    "        name=\"one_nr_coach\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced debate coach and judge. \"\n",
    "            \"Your job is to review the 1NR speech draft and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Argument coverage: Did the 1NR answer all the most important 2AC arguments, especially those not fully addressed by the 2NC?\\n\"\n",
    "            \"- Strategic focus: Did the 1NR strategically concede any weak arguments from the 1NC or 2NC in order to spend more time on the arguments the negative is winning on, and avoid spreading too thin?\\n\"\n",
    "            \"- Distinctiveness: Did the 1NR make arguments that are distinct from the 2NC, rather than repeating or rephrasing them? Did it cover different off-case or on-case arguments, or provide new extensions or analytics?\\n\"\n",
    "            \"- Length: Is the 1NR noticeably shorter than the 2NC, reflecting the time constraints of the speech?\\n\"\n",
    "            \"- Clarity and organization: Is the speech easy to flow and follow?\\n\"\n",
    "            \"- Persuasiveness and use of evidence: Are arguments well-supported and explained?\\n\"\n",
    "            \"- Realism: Does the speech sound like a real, high-level 1NR?\\n\"\n",
    "            \"Suggest specific improvements, then ask the debater to revise the speech accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between drafter and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: drafter writes initial speech\n",
    "        if len(groupchat.messages) == 0:\n",
    "            return one_nr_drafter\n",
    "        # Drafter just wrote: coach reviews\n",
    "        if last_speaker is one_nr_drafter:\n",
    "            return None\n",
    "        if last_speaker is one_nr_coach:\n",
    "            return one_nr_drafter\n",
    "        return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[one_nr_drafter, one_nr_coach],\n",
    "        messages=[],\n",
    "        max_round=4,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the 1NR drafter\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Speech and Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        f\"2NC Speech and Cards:\\n{twonc_case_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"Write a complete, high-quality, realistic 1NR speech transcript as if you are reading it aloud in a debate round. \"\n",
    "        \"The 1NR should be noticeably shorter than the 2NC, simulating the less time allocated to the 1NR. \"\n",
    "        \"Focus on making arguments that are distinct from those made in the 2NC, rather than repeating or rephrasing them. If possible, cover different off-case or on-case arguments, or provide new extensions, analytics, or strategic concessions. \"\n",
    "        \"Cover all major 2AC arguments relevant to the 1NR, extend the negative, and strategically concede any arguments from the 1NC or 2NC that you feel are weak, so that you can spend more time on the arguments that the negative is winning on. \"\n",
    "        \"Use debate structure and jargon. Do not simply list evidence—write the full speech. \"\n",
    "        \"When answering affirmative permutations or new arguments, focus your time on the arguments the negative is winning and explain why the negative wins. \"\n",
    "        \"IMPORTANT: Output the 1NR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = one_nr_coach.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last 1NR speech draft in the chat history\n",
    "    # (The drafter's last message)\n",
    "    transcript = chat_result.chat_history[-1][\"content\"]\n",
    "    # Append the 1NR transcript to the onenr_case_html\n",
    "    onenr_case_html = \"\\n<div class='one-nr-section'>\\n\"\n",
    "    onenr_case_html += \"<h1>1NR Speech</h1>\\n\"\n",
    "    onenr_case_html += f\"<div class='onenr-transcript'>{transcript}</div>\\n\"\n",
    "    onenr_case_html += \"</div>\\n\"\n",
    "\n",
    "    return onenr_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "onenr_html = add_1nr_to_case(debate_case, negative_case_html, new_ac_case, two_nc_html_full)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(onenr_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "onenr_html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1AR "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_1ar_to_case(debate_case, negative_case_html, twoac_case_html, twonc_case_html, onenr_html):\n",
    "    \"\"\"\n",
    "    Generates a complete, high-quality 1AR debate speech transcript.\n",
    "    The function takes as input the debate_case (HTML, including 1AC), negative_case_html (HTML, the 1NC), twoac_case_html (HTML for the 2AC section), twonc_case_html (HTML for the 2NC section), and onenr_html (HTML for the 1NR section).\n",
    "    It appends a full, iteratively drafted 1AR transcript to 1AR_case_html.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 1AR Drafter: Writes the initial and revised 1AR speeches\n",
    "    one_ar_drafter = ConversableAgent(\n",
    "        name=\"one_ar_drafter\",\n",
    "        system_message=(\n",
    "            \"You are an expert affirmative policy debater preparing the 1AR speech. \"\n",
    "            \"You have access to the full debate_case (including the 1AC), the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc), the 2AC, the 2NC, and the 1NR. \"\n",
    "            \"Your job is to write a complete, high-quality, persuasive, and well-organized 1AR speech transcript. \"\n",
    "            \"The speech should:\\n\"\n",
    "            \"- Extend all 1AC and 2AC arguments and evidence\\n\"\n",
    "            \"- Directly answer and refute all of the 2NC and 1NR arguments (off-case and on-case)\\n\"\n",
    "            \"- Clearly signpost and flow arguments (e.g., 'On the DA...', 'On the Kritik...', 'On Topicality...', etc.)\\n\"\n",
    "            \"- Explain why the affirmative is still winning the round\\n\"\n",
    "            \"- Use debate jargon and structure as in a real 1AR speech\\n\"\n",
    "            \"- Be extremely long, highly detailed, and complete—covering all major 2NC and 1NR arguments and providing clear, specific, line-by-line answers\\n\"\n",
    "            \"- Be written as a transcript, as if the 1AR is being read aloud in a debate round\\n\"\n",
    "            \"- When answering counterplans and kritiks, you may and should include debate permutations (such as 'perm do both', 'perm do the plan', etc.) if and only if they are strategic in the context of the round. Do not use permutations automatically—only include them if they are likely to be effective and relevant against the specific counterplan or kritik presented in the 2NC or 1NR.\\n\"\n",
    "            \"Do NOT simply list evidence or cards—write the full speech, integrating evidence and arguments as a debater would.\\n\"\n",
    "            \"IMPORTANT: Output the 1AR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 1AR Coach: Reviews and suggests improvements for the 1AR speech\n",
    "    one_ar_coach = ConversableAgent(\n",
    "        name=\"one_ar_coach\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced debate coach and judge. \"\n",
    "            \"Your job is to review the 1AR speech draft and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Argument coverage: Did the 1AR answer all the most important 2NC and 1NR arguments?\\n\"\n",
    "            \"- Strategic focus: Did the 1AR collapse to the best ground and avoid spreading too thin?\\n\"\n",
    "            \"- Clarity and organization: Is the speech easy to flow and follow?\\n\"\n",
    "            \"- Persuasiveness and use of evidence: Are arguments well-supported and explained?\\n\"\n",
    "            \"- Realism: Does the speech sound like a real, high-level 1AR?\\n\"\n",
    "            \"Suggest specific improvements, then ask the debater to revise the speech accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between drafter and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: drafter writes initial speech\n",
    "        if len(groupchat.messages) == 0:\n",
    "            return one_ar_drafter\n",
    "        # Drafter just wrote: coach reviews\n",
    "        if last_speaker is one_ar_drafter:\n",
    "            return None\n",
    "        if last_speaker is one_ar_coach:\n",
    "            return one_ar_drafter\n",
    "        return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[one_ar_drafter, one_ar_coach],\n",
    "        messages=[],\n",
    "        max_round=4,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the 1AR drafter\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Speech and Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        f\"2NC Speech and Cards:\\n{twonc_case_html}\\n\\n\"\n",
    "        f\"1NR Speech and Cards:\\n{onenr_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"Write a complete, high-quality, realistic 1AR speech transcript as if you are reading it aloud in a debate round. \"\n",
    "        \"Cover all major 2NC and 1NR arguments, extend the 2AC, and crystalize why you are winning strategically. \"\n",
    "        \"Use debate structure and jargon. Do not simply list evidence—write the full speech. \"\n",
    "        \"When answering counterplans and kritiks, you may and should include debate permutations (such as 'perm do both', 'perm do the plan', etc.) if and only if they are strategic in the context of the round. Do not use permutations automatically—only include them if they are likely to be effective and relevant against the specific counterplan or kritik presented in the 2NC or 1NR.\\n\"\n",
    "        \"IMPORTANT: Output the 1AR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = one_ar_coach.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last 1AR speech draft in the chat history\n",
    "    # (The drafter's last message)\n",
    "    transcript = chat_result.chat_history[-1][\"content\"]\n",
    "    # Append the 1AR transcript to the 1AR_case_html\n",
    "    onear_case_html = \"\\n<div class='one-ar-section'>\\n\"\n",
    "    onear_case_html += \"<h1>1AR Speech</h1>\\n\"\n",
    "    onear_case_html += f\"<div class='onear-transcript'>{transcript}</div>\\n\"\n",
    "    onear_case_html += \"</div>\\n\"\n",
    "\n",
    "    return onear_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_ar_html = add_1ar_to_case(debate_case, negative_case_html, new_ac_case, two_nc_html_full, onenr_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(one_ar_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_ar_html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2NR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_2nr_to_case(debate_case, negative_case_html, twoac_case_html, twonc_case_html, onenr_html, onear_case_html):\n",
    "    \"\"\"\n",
    "    Generates a complete, high-quality 2NR debate speech transcript.\n",
    "    The function takes as input:\n",
    "        - debate_case (HTML, including 1AC)\n",
    "        - negative_case_html (HTML, the 1NC)\n",
    "        - twoac_case_html (HTML for the 2AC section)\n",
    "        - twonc_case_html (HTML for the 2NC section)\n",
    "        - onenr_html (HTML for the 1NR section)\n",
    "        - onear_case_html (HTML for the 1AR section)\n",
    "    It appends a full, iteratively drafted 2NR transcript to 2nr_case_html.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 2NR Drafter: Writes the initial and revised 2NR speeches\n",
    "    two_nr_drafter = ConversableAgent(\n",
    "        name=\"two_nr_drafter\",\n",
    "        system_message=(\n",
    "            \"You are an expert negative policy debater preparing the 2NR speech. \"\n",
    "            \"You have access to the full debate_case (including the 1AC), the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc), the 2AC, the 2NC, the 1NR, and the 1AR. \"\n",
    "            \"Your job is to write a complete, high-quality, persuasive, and well-organized 2NR speech transcript. \"\n",
    "            \"The speech should:\\n\"\n",
    "            \"- Extend the best negative arguments and evidence from the 1NC and 2NC\\n\"\n",
    "            \"- Directly answer and refute all of the 2AC and 1AR arguments (off-case and on-case)\\n\"\n",
    "            \"- Clearly signpost and flow arguments (e.g., 'On the DA...', 'On the Kritik...', 'On Topicality...', etc.)\\n\"\n",
    "            \"- Explain why the negative is still winning the round\\n\"\n",
    "            \"- Use debate jargon and structure as in a real 2NR speech\\n\"\n",
    "            \"- Be extremely long, highly detailed, and complete—covering all major 1AR arguments and providing clear, specific, line-by-line answers\\n\"\n",
    "            \"- Be written as a transcript, as if the 2NR is being read aloud in a debate round\\n\"\n",
    "            \"- When answering affirmative permutations or new arguments, you may and should strategically concede any arguments from the 1NC or 2NC that you feel are weak, so that you can spend more time on the arguments that the negative is winning on. Do not simply list evidence or cards—write the full speech, integrating evidence and arguments as a debater would.\"\n",
    "            \"\\nIMPORTANT: Output the 2NR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 2NR Coach: Reviews and suggests improvements for the 2NR speech\n",
    "    two_nr_coach = ConversableAgent(\n",
    "        name=\"two_nr_coach\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced debate coach and judge. \"\n",
    "            \"Your job is to review the 2NR speech draft and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Argument coverage: Did the 2NR answer all the most important 1AR arguments?\\n\"\n",
    "            \"- Strategic focus: Did the 2NR strategically concede any weak arguments from the 1NC or 2NC in order to spend more time on the arguments the negative is winning on, and avoid spreading too thin?\\n\"\n",
    "            \"- Clarity and organization: Is the speech easy to flow and follow?\\n\"\n",
    "            \"- Persuasiveness and use of evidence: Are arguments well-supported and explained?\\n\"\n",
    "            \"- Realism: Does the speech sound like a real, high-level 2NR?\\n\"\n",
    "            \"Suggest specific improvements, then ask the debater to revise the speech accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between drafter and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: drafter writes initial speech\n",
    "        if len(groupchat.messages) == 0:\n",
    "            return two_nr_drafter\n",
    "        # Drafter just wrote: coach reviews\n",
    "        if last_speaker is two_nr_drafter:\n",
    "            return None\n",
    "        if last_speaker is two_nr_coach:\n",
    "            return two_nr_drafter\n",
    "        return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[two_nr_drafter, two_nr_coach],\n",
    "        messages=[],\n",
    "        max_round=4,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the 2NR drafter\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Speech and Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        f\"2NC Speech and Cards:\\n{twonc_case_html}\\n\\n\"\n",
    "        f\"1NR Speech and Cards:\\n{onenr_html}\\n\\n\"\n",
    "        f\"1AR Speech and Cards:\\n{onear_case_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"Write a complete, high-quality, realistic 2NR speech transcript as if you are reading it aloud in a debate round. \"\n",
    "        \"Cover all major 1AR arguments, extend the negative, and strategically concede any arguments from the 1NC or 2NC that you feel are weak, so that you can spend more time on the arguments that the negative is winning on. \"\n",
    "        \"Use debate structure and jargon. Do not simply list evidence—write the full speech. \"\n",
    "        \"When answering affirmative permutations or new arguments, focus your time on the arguments the negative is winning and explain why the negative wins.\"\n",
    "        \"\\nIMPORTANT: Output the 2NR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = two_nr_coach.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last 2NR speech draft in the chat history\n",
    "    # (The drafter's last message)\n",
    "    transcript = chat_result.chat_history[-1][\"content\"]\n",
    "    # Append the 2NR transcript to the 2nr_case_html\n",
    "    two_nr_case_html = \"\\n<div class='two-nr-section'>\\n\"\n",
    "    two_nr_case_html += \"<h1>2NR Speech</h1>\\n\"\n",
    "    two_nr_case_html += f\"<div class='twonr-transcript'>{transcript}</div>\\n\"\n",
    "    two_nr_case_html += \"</div>\\n\"\n",
    "\n",
    "    return two_nr_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nr_html = add_2nr_to_case(debate_case, negative_case_html, new_ac_case, two_nc_html_full, onenr_html, one_ar_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(two_nr_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_nr_html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2AR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_2ar_to_case(\n",
    "    debate_case,\n",
    "    negative_case_html,\n",
    "    twoac_case_html,\n",
    "    twonc_case_html,\n",
    "    onenr_html,\n",
    "    onear_case_html,\n",
    "    two_nr_case_html\n",
    "):\n",
    "    \"\"\"\n",
    "    Generates a complete, high-quality 2AR debate speech transcript.\n",
    "    The function takes as input:\n",
    "        - debate_case (HTML, including 1AC)\n",
    "        - negative_case_html (HTML, the 1NC)\n",
    "        - twoac_case_html (HTML for the 2AC section)\n",
    "        - twonc_case_html (HTML for the 2NC section)\n",
    "        - onenr_html (HTML for the 1NR section)\n",
    "        - onear_case_html (HTML for the 1AR section)\n",
    "        - two_nr_case_html (HTML for the 2NR section)\n",
    "    It returns a full, iteratively drafted 2AR transcript as 2ar_case_html.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=1.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # 2AR Drafter: Writes the initial and revised 2AR speeches\n",
    "    two_ar_drafter = ConversableAgent(\n",
    "        name=\"two_ar_drafter\",\n",
    "        system_message=(\n",
    "            \"You are an expert affirmative policy debater preparing the 2AR speech. \"\n",
    "            \"You have access to the full debate_case (including the 1AC), the negative_case_html (the 1NC, including all off-case positions: theory, disadvantages, counterplans, kritiks, etc), the 2AC, the 2NC, the 1NR, the 1AR, and the 2NR. \"\n",
    "            \"Your job is to write a complete, high-quality, persuasive, and well-organized 2AR speech transcript. \"\n",
    "            \"The speech should:\\n\"\n",
    "            \"- Extend the best affirmative arguments and evidence from the 1AC, 2AC, and 1AR\\n\"\n",
    "            \"- Directly answer and refute all of the 1NC, 2NC, 1NR, and 2NR arguments (off-case and on-case)\\n\"\n",
    "            \"- Clearly signpost and flow arguments (e.g., 'On the DA...', 'On the Kritik...', 'On Topicality...', etc.)\\n\"\n",
    "            \"- Explain why the affirmative is still winning the round\\n\"\n",
    "            \"- Use debate jargon and structure as in a real 2AR speech\\n\"\n",
    "            \"- Be extremely long, highly detailed, and complete—covering all major 2NR arguments and providing clear, specific, line-by-line answers\\n\"\n",
    "            \"- Be written as a transcript, as if the 2AR is being read aloud in a debate round\\n\"\n",
    "            \"- Do not simply list evidence or cards—write the full speech, integrating evidence and arguments as a debater would.\"\n",
    "            \"\\nIMPORTANT: Output the 2AR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # 2AR Coach: Reviews and suggests improvements for the 2AR speech\n",
    "    two_ar_coach = ConversableAgent(\n",
    "        name=\"two_ar_coach\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced debate coach and judge. \"\n",
    "            \"Your job is to review the 2AR speech draft and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Argument coverage: Did the 2AR answer all the most important 2NR and 1NR arguments?\\n\"\n",
    "            \"- Strategic focus: Did the 2AR focus on the arguments the affirmative is winning and answer the negative's best arguments?\\n\"\n",
    "            \"- Clarity and organization: Is the speech easy to flow and follow?\\n\"\n",
    "            \"- Persuasiveness and use of evidence: Are arguments well-supported and explained?\\n\"\n",
    "            \"- Realism: Does the speech sound like a real, high-level 2AR?\\n\"\n",
    "            \"Suggest specific improvements, then ask the debater to revise the speech accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between drafter and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: drafter writes initial speech\n",
    "        if len(groupchat.messages) == 0:\n",
    "            return two_ar_drafter\n",
    "        # Drafter just wrote: coach reviews\n",
    "        if last_speaker is two_ar_drafter:\n",
    "            return None\n",
    "        if last_speaker is two_ar_coach:\n",
    "            return two_ar_drafter\n",
    "        return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[two_ar_drafter, two_ar_coach],\n",
    "        messages=[],\n",
    "        max_round=4,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the 2AR drafter\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Speech and Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        f\"2NC Speech and Cards:\\n{twonc_case_html}\\n\\n\"\n",
    "        f\"1NR Speech and Cards:\\n{onenr_html}\\n\\n\"\n",
    "        f\"1AR Speech and Cards:\\n{onear_case_html}\\n\\n\"\n",
    "        f\"2NR Speech and Cards:\\n{two_nr_case_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"Write a complete, high-quality, realistic 2AR speech transcript as if you are reading it aloud in a debate round. \"\n",
    "        \"Cover all major 2NR and 1NR arguments, extend the affirmative, and answer all negative arguments. \"\n",
    "        \"Use debate structure and jargon. Do not simply list evidence—write the full speech. \"\n",
    "        \"When answering negative arguments or new arguments, focus your time on the arguments the affirmative is winning and explain why the affirmative wins.\"\n",
    "        \"\\nIMPORTANT: Output the 2AR speech transcript in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = two_ar_coach.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last 2AR speech draft in the chat history\n",
    "    # (The drafter's last message)\n",
    "    transcript = chat_result.chat_history[-1][\"content\"]\n",
    "    # Append the 2AR transcript to the 2ar_case_html\n",
    "    two_ar_case_html = \"\\n<div class='two-ar-section'>\\n\"\n",
    "    two_ar_case_html += \"<h1>2AR Speech</h1>\\n\"\n",
    "    two_ar_case_html += f\"<div class='twoar-transcript'>{transcript}</div>\\n\"\n",
    "    two_ar_case_html += \"</div>\\n\"\n",
    "\n",
    "    return two_ar_case_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_ar_html = add_2ar_to_case(debate_case, negative_case_html, new_ac_case, two_nc_html_full, onenr_html, one_ar_html, two_nr_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(two_ar_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "two_ar_html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Judge Vote!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def judge_decision_on_round(\n",
    "    debate_case,\n",
    "    negative_case_html,\n",
    "    twoac_case_html,\n",
    "    twonc_case_html,\n",
    "    onear_case_html,\n",
    "    onenr_html,\n",
    "    two_nr_case_html,\n",
    "    two_ar_case_html\n",
    "):\n",
    "    \"\"\"\n",
    "    Simulates a debate judge voting on the round and writing a detailed RFD (Reason for Decision).\n",
    "    The function takes as input:\n",
    "        - debate_case (HTML, including 1AC)\n",
    "        - negative_case_html (HTML, the 1NC)\n",
    "        - twoac_case_html (HTML for the 2AC section)\n",
    "        - twonc_case_html (HTML for the 2NC section)\n",
    "        - onear_case_html (HTML for the 1AR section)\n",
    "        - onenr_html (HTML for the 1NR section)\n",
    "        - two_nr_case_html (HTML for the 2NR section)\n",
    "        - two_ar_case_html (HTML for the 2AR section)\n",
    "    It returns judge_decision_html, which includes the judge's vote and a long, detailed RFD.\n",
    "    \"\"\"\n",
    "\n",
    "    llm_config = LLMConfig(\n",
    "        api_type=\"openai\",\n",
    "        model=\"gpt-4.1-mini\",\n",
    "        api_key=OPENAI_API_KEY,\n",
    "        temperature=2.0,\n",
    "        top_p=0.9,\n",
    "        parallel_tool_calls=None\n",
    "    )\n",
    "\n",
    "    # Judge: Reads the round and writes a detailed RFD and decision\n",
    "    judge_agent = ConversableAgent(\n",
    "        name=\"debate_judge\",\n",
    "        system_message=(\n",
    "            \"You are a highly experienced policy debate judge. \"\n",
    "            \"You have just listened to a full debate round, including the 1AC, 1NC, 2AC, 2NC, 1AR, 1NR, 2NR, and 2AR. \"\n",
    "            \"You must act as unbiasedly as possible, judging only the arguments and evidence presented in the round. \"\n",
    "            \"Be open to voting for positions or arguments that you might personally disagree with, if the debaters for that side made more compelling arguments and won the relevant issues. \"\n",
    "            \"Your job is to decide who won the round (Affirmative or Negative) and write a long, detailed Reason for Decision (RFD). \"\n",
    "            \"Your RFD should:\\n\"\n",
    "            \"- Clearly state who you voted for (Affirmative or Negative) at the top\\n\"\n",
    "            \"- Explain, in detail, the most important issues in the round and how you resolved them\\n\"\n",
    "            \"- Reference specific arguments and speeches (including the 2AR, 2NR, and 1NR)\\n\"\n",
    "            \"- Flow the round and explain how you evaluated the arguments and evidence\\n\"\n",
    "            \"- Use debate jargon and structure as a real judge would\\n\"\n",
    "            \"- Be realistic, thorough, and educational for the debaters\\n\"\n",
    "            \"- Do not simply summarize the speeches—explain your decision process and why you voted the way you did\"\n",
    "            \"\\nIMPORTANT: Output your RFD in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Judge Coach: Reviews the RFD and suggests improvements\n",
    "    judge_coach_agent = ConversableAgent(\n",
    "        name=\"judge_coach\",\n",
    "        system_message=(\n",
    "            \"You are a debate coach and judge trainer. \"\n",
    "            \"Your job is to review the judge's RFD and provide detailed, constructive feedback for improvement. \"\n",
    "            \"Focus on:\\n\"\n",
    "            \"- Clarity: Is the decision and RFD clear and easy to follow?\\n\"\n",
    "            \"- Argument evaluation: Did the judge accurately flow and resolve the key issues?\\n\"\n",
    "            \"- Educational value: Does the RFD help debaters understand what happened and how to improve?\\n\"\n",
    "            \"- Realism: Does the RFD sound like a real, high-level debate judge?\\n\"\n",
    "            \"Suggest specific improvements, then ask the judge to revise the RFD accordingly.\"\n",
    "        ),\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    from autogen import GroupChat\n",
    "\n",
    "    # Speaker selection: alternate between judge and coach for 2-3 rounds\n",
    "    def speaker_selection_func(last_speaker, groupchat):\n",
    "        # First message: judge writes initial RFD\n",
    "        if last_speaker is judge_coach_agent:\n",
    "            return judge_agent\n",
    "        else:\n",
    "            return None\n",
    "\n",
    "    group_chat = GroupChat(\n",
    "        agents=[judge_agent, judge_coach_agent],\n",
    "        messages=[],\n",
    "        max_round=2,\n",
    "        speaker_selection_method=speaker_selection_func\n",
    "    )\n",
    "\n",
    "    group_chat_manager = GroupChatManager(\n",
    "        groupchat=group_chat,\n",
    "        llm_config=llm_config,\n",
    "    )\n",
    "\n",
    "    # Compose the context for the judge\n",
    "    context_message = (\n",
    "        f\"{debate_case}\\n\\n\"\n",
    "        f\"Negative Case (1NC):\\n{negative_case_html}\\n\\n\"\n",
    "        f\"2AC Speech and Cards:\\n{twoac_case_html}\\n\\n\"\n",
    "        f\"2NC Speech and Cards:\\n{twonc_case_html}\\n\\n\"\n",
    "        f\"1AR Speech and Cards:\\n{onear_case_html}\\n\\n\"\n",
    "        f\"1NR Speech and Cards:\\n{onenr_html}\\n\\n\"\n",
    "        f\"2NR Speech and Cards:\\n{two_nr_case_html}\\n\\n\"\n",
    "        f\"2AR Speech and Cards:\\n{two_ar_case_html}\\n\\n\"\n",
    "        \"Assume that the current year is 2022.\\n\"\n",
    "        \"You are the judge of this debate round. \"\n",
    "        \"You must act as unbiasedly as possible, judging only the arguments and evidence presented in the round. \"\n",
    "        \"Be open to voting for positions or arguments that you might personally disagree with, if the debaters for that side made more compelling arguments and won the relevant issues. \"\n",
    "        \"Write your decision: clearly state who you voted for (Affirmative or Negative) and then write a long, detailed Reason for Decision (RFD) explaining how you evaluated the round, which arguments you found most important, and why you voted the way you did. \"\n",
    "        \"Reference specific arguments and speeches, and use debate structure and jargon. \"\n",
    "        \"Be realistic and thorough.\"\n",
    "        \"\\nIMPORTANT: Output your RFD in HTML format, using <div>, <h2>, <b>, <ul>, <li>, <p>, and similar tags as appropriate. Do NOT use <pre>, <code>, or markdown formatting. The output should visually match the style of the input card HTML formats.\"\n",
    "    )\n",
    "\n",
    "    # Start the group chat\n",
    "    chat_result = judge_coach_agent.initiate_chat(\n",
    "        group_chat_manager,\n",
    "        message=context_message,\n",
    "    )\n",
    "\n",
    "    # Find the last RFD in the chat history (judge's last message)\n",
    "    rfd = chat_result.chat_history[-1][\"content\"]\n",
    "    # Format the judge decision as HTML\n",
    "    judge_decision_html = \"\\n<div class='judge-decision-section'>\\n\"\n",
    "    judge_decision_html += \"<h1>Judge Decision and RFD</h1>\\n\"\n",
    "    judge_decision_html += f\"<div class='judge-rfd'>{rfd}</div>\\n\"\n",
    "    judge_decision_html += \"</div>\\n\"\n",
    "\n",
    "    return judge_decision_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "judge_html = judge_decision_on_round(debate_case, negative_case_html, new_ac_case, two_nc_html_full, one_ar_html, onenr_html, two_nr_html, two_ar_html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(HTML(judge_html))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "judge_html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "agentops.end_session(\"Success\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
