{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1b3b8a92",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import time\n",
    "import os\n",
    "\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "from pathlib import Path\n",
    "\n",
    "import anthropic\n",
    "from anthropic.types.message_create_params import MessageCreateParamsNonStreaming\n",
    "from anthropic.types.messages.batch_create_params import Request"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "68c26329",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load .env file from the parent directory\n",
    "env_path = \".env\"\n",
    "load_dotenv(dotenv_path=env_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "570e3432",
   "metadata": {},
   "source": [
    "# CLAUDE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "306aa63e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12288.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "8192 * 1.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "4b9c5306",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "API key found: True\n",
      "API key starts with: sk-ant-api...\n"
     ]
    }
   ],
   "source": [
    "# Debug: Check if API key is set\n",
    "api_key = os.environ.get(\"ANTHROPIC_API_KEY\") or os.environ.get(\"CLAUDE_API_KEY\")\n",
    "print(f\"API key found: {bool(api_key)}\")\n",
    "if api_key:\n",
    "    print(f\"API key starts with: {api_key[:10]}...\")\n",
    "\n",
    "anthropic_client = anthropic.Anthropic(\n",
    "    api_key=api_key,\n",
    ")\n",
    "\n",
    "# Updated for Claude models\n",
    "anthropic_models_list = [\n",
    "    (12288, \"claude-sonnet-4-20250514\")\n",
    "]\n",
    "\n",
    "base_input_path = \"/home/rodionfa/FloorplanQA/qa_jsonl/{dataset}/{room_type}/{model_id}_{max_tokens}.jsonl\"\n",
    "base_output_path = \"/home/rodionfa/FloorplanQA/qa_response/{dataset}/{room_type}/{model_id}_{max_tokens}.jsonl\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "f67bef3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# room_types = [\"hssd_data\"]\n",
    "room_types = [\"living_rooms\", \"bedrooms\", \"kitchens\", \"hssd_data_simplified\"]\n",
    "# room_types = [\"kitchens\", \"hssd_data_simplified\"]\n",
    "# room_types = [\"kitchens\", \"hssd_data_simplified\"]\n",
    "# datasets = [\"max_box\", \"free_space\", \"pair_distance\"]\n",
    "datasets = [\n",
    "    \"shortest_path\",\n",
    "    \"obstruction\",\n",
    "    # \"view_angle\",\n",
    "    # \"max_box\",\n",
    "    # \"free_space\",\n",
    "    # \"pair_distance\",\n",
    "    # \"placement\",\n",
    "    # \"repositioning\",\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "5043aaa2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/shortest_path/living_rooms/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/shortest_path/bedrooms/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/shortest_path/kitchens/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/shortest_path/hssd_data_simplified/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/obstruction/living_rooms/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/obstruction/bedrooms/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/obstruction/kitchens/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n",
      "Processing /home/rodionfa/FloorplanQA/qa_jsonl/obstruction/hssd_data_simplified/claude-sonnet-4-20250514_12288.jsonl for model claude-sonnet-4-20250514 with max_tokens 12288\n"
     ]
    }
   ],
   "source": [
    "anthropic_batches = []\n",
    "\n",
    "for max_tokens, model_id in anthropic_models_list:\n",
    "    for dataset in datasets:\n",
    "        for room_type in room_types:\n",
    "            file_jsonl = base_input_path.format(dataset=dataset, room_type=room_type, model_id=model_id, max_tokens=max_tokens)\n",
    "            print(f\"Processing {file_jsonl} for model {model_id} with max_tokens {max_tokens}\")\n",
    "            # Read all requests from the JSONL file and convert to Anthropic format\n",
    "            requests = []\n",
    "            with open(file_jsonl, 'r') as f:\n",
    "                for line_num, line in enumerate(f):\n",
    "                    data = json.loads(line.strip())\n",
    "                    \n",
    "                    # Convert OpenAI format to Anthropic format\n",
    "                    openai_body = data['body']\n",
    "                    \n",
    "                    # Verify first request format before conversion\n",
    "                    if line_num == 0 and len(openai_body['messages']) > 1:\n",
    "                        assert 'openings' in openai_body['messages'][-1]['content']\n",
    "\n",
    "                    # Extract system message and convert to Anthropic format\n",
    "                    messages = openai_body['messages']\n",
    "                    system_content = None\n",
    "                    user_assistant_messages = []\n",
    "                    \n",
    "                    for msg in messages:\n",
    "                        if msg['role'] == 'system':\n",
    "                            system_content = msg['content']\n",
    "                        else:\n",
    "                            user_assistant_messages.append(msg)\n",
    "                    \n",
    "                    # Create properly typed Request object\n",
    "                    params = {\n",
    "                        \"model\": model_id,\n",
    "                        \"max_tokens\": openai_body['max_tokens'],\n",
    "                        \"messages\": user_assistant_messages,\n",
    "                        \"temperature\": openai_body.get('temperature', 0),\n",
    "                    }\n",
    "                    \n",
    "                    # Add system parameter if system message exists\n",
    "                    if system_content:\n",
    "                        params[\"system\"] = system_content\n",
    "                    \n",
    "                    anthropic_request = Request(\n",
    "                        custom_id=data['custom_id'],\n",
    "                        params=MessageCreateParamsNonStreaming(**params)\n",
    "                    )\n",
    "                    requests.append(anthropic_request)\n",
    "\n",
    "            # Create batch with list of typed Request objects\n",
    "            batch = anthropic_client.messages.batches.create(\n",
    "                requests=requests\n",
    "            )\n",
    "            \n",
    "            anthropic_batches.append(batch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "23c6af92",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MessageBatch(id='msgbatch_01H3EEDQDqftD5y7pgFq2wYR', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 18, 235296, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 51, 8, 36085, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 18, 235296, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=0, succeeded=600), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01H3EEDQDqftD5y7pgFq2wYR/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_01BKLTVNodig1mtS2AarLdPS', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 20, 311819, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 54, 34, 199258, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 20, 311819, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=1, expired=0, processing=0, succeeded=599), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01BKLTVNodig1mtS2AarLdPS/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_01EZ3bif9L7dzTKTHjS3iDU2', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 22, 201048, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 50, 50, 610950, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 22, 201048, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=0, succeeded=600), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01EZ3bif9L7dzTKTHjS3iDU2/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_01MWDbf3Cpre4vZzec7zrZWs', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 23, 606051, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 50, 42, 127909, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 23, 606051, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=0, succeeded=200), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01MWDbf3Cpre4vZzec7zrZWs/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_019ygfbQhADmKQyRqNkNASu3', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 25, 268571, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 56, 21, 809306, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 25, 268571, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=1, expired=0, processing=0, succeeded=599), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_019ygfbQhADmKQyRqNkNASu3/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_01HssStow58k55dCrtg5QZr5', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 26, 846692, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 57, 10, 108219, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 26, 846692, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=6, expired=0, processing=0, succeeded=594), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01HssStow58k55dCrtg5QZr5/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_01QavfjBZpUewydn9haduhvx', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 28, 482208, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 54, 31, 384456, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 28, 482208, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=1, expired=0, processing=0, succeeded=599), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_01QavfjBZpUewydn9haduhvx/results', type='message_batch')\n",
      "MessageBatch(id='msgbatch_011DT4HRaE1hh16q4P97vvfq', archived_at=None, cancel_initiated_at=None, created_at=datetime.datetime(2025, 9, 23, 6, 48, 29, 711179, tzinfo=datetime.timezone.utc), ended_at=datetime.datetime(2025, 9, 23, 6, 50, 54, 149391, tzinfo=TzInfo(UTC)), expires_at=datetime.datetime(2025, 9, 24, 6, 48, 29, 711179, tzinfo=datetime.timezone.utc), processing_status='ended', request_counts=MessageBatchRequestCounts(canceled=0, errored=0, expired=0, processing=0, succeeded=200), results_url='https://api.anthropic.com/v1/messages/batches/msgbatch_011DT4HRaE1hh16q4P97vvfq/results', type='message_batch')\n"
     ]
    }
   ],
   "source": [
    "for batch in anthropic_batches:\n",
    "    batch_info = anthropic_client.messages.batches.retrieve(batch.id)\n",
    "    print(batch_info)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "f8ed2ab5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results for batch msgbatch_01H3EEDQDqftD5y7pgFq2wYR written to /home/rodionfa/FloorplanQA/qa_response/shortest_path/living_rooms/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_01BKLTVNodig1mtS2AarLdPS written to /home/rodionfa/FloorplanQA/qa_response/shortest_path/bedrooms/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_01EZ3bif9L7dzTKTHjS3iDU2 written to /home/rodionfa/FloorplanQA/qa_response/shortest_path/kitchens/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_01MWDbf3Cpre4vZzec7zrZWs written to /home/rodionfa/FloorplanQA/qa_response/shortest_path/hssd_data_simplified/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_019ygfbQhADmKQyRqNkNASu3 written to /home/rodionfa/FloorplanQA/qa_response/obstruction/living_rooms/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_01HssStow58k55dCrtg5QZr5 written to /home/rodionfa/FloorplanQA/qa_response/obstruction/bedrooms/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_01QavfjBZpUewydn9haduhvx written to /home/rodionfa/FloorplanQA/qa_response/obstruction/kitchens/claude-sonnet-4-20250514_12288.jsonl\n",
      "Results for batch msgbatch_011DT4HRaE1hh16q4P97vvfq written to /home/rodionfa/FloorplanQA/qa_response/obstruction/hssd_data_simplified/claude-sonnet-4-20250514_12288.jsonl\n"
     ]
    }
   ],
   "source": [
    "index = 0\n",
    "for max_tokens, model_id in anthropic_models_list:\n",
    "    for dataset in datasets:\n",
    "        for room_type in room_types:\n",
    "        \n",
    "            batch = anthropic_batches[index]\n",
    "            \n",
    "            batch_info = anthropic_client.messages.batches.retrieve(batch.id)\n",
    "            \n",
    "            # Only download if batch processing is ended\n",
    "            if batch_info.processing_status == \"ended\":\n",
    "                # Use the newer results method\n",
    "                result_stream = anthropic_client.messages.batches.results(batch.id)\n",
    "                \n",
    "                output_jsonl = base_output_path.format(dataset=dataset, room_type=room_type, model_id=model_id, max_tokens=max_tokens)\n",
    "                output_jsonl = Path(output_jsonl)\n",
    "                output_jsonl.parent.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "                with open(output_jsonl, \"w\") as f:\n",
    "                    for entry in result_stream:\n",
    "                        json.dump(entry.model_dump(), f)\n",
    "                        f.write(\"\\n\")\n",
    "                print(f\"Results for batch {batch.id} written to {output_jsonl}\")\n",
    "            else:\n",
    "                print(f\"Batch {batch.id} not completed yet. Status: {batch_info.processing_status}\")\n",
    "            \n",
    "            index += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a67698d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1384fc98",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "plan_b",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
