{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!pip install lifelines"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Atr45BQAX4JV",
        "outputId": "414a0421-788d-42c6-d259-3df88f17766d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting lifelines\n",
            "  Downloading lifelines-0.30.0-py3-none-any.whl.metadata (3.2 kB)\n",
            "Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.12/dist-packages (from lifelines) (2.0.2)\n",
            "Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.12/dist-packages (from lifelines) (1.16.1)\n",
            "Requirement already satisfied: pandas>=2.1 in /usr/local/lib/python3.12/dist-packages (from lifelines) (2.2.2)\n",
            "Requirement already satisfied: matplotlib>=3.0 in /usr/local/lib/python3.12/dist-packages (from lifelines) (3.10.0)\n",
            "Requirement already satisfied: autograd>=1.5 in /usr/local/lib/python3.12/dist-packages (from lifelines) (1.8.0)\n",
            "Collecting autograd-gamma>=0.3 (from lifelines)\n",
            "  Downloading autograd-gamma-0.5.0.tar.gz (4.0 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting formulaic>=0.2.2 (from lifelines)\n",
            "  Downloading formulaic-1.2.0-py3-none-any.whl.metadata (7.0 kB)\n",
            "Collecting interface-meta>=1.2.0 (from formulaic>=0.2.2->lifelines)\n",
            "  Downloading interface_meta-1.3.0-py3-none-any.whl.metadata (6.7 kB)\n",
            "Requirement already satisfied: narwhals>=1.17 in /usr/local/lib/python3.12/dist-packages (from formulaic>=0.2.2->lifelines) (2.1.2)\n",
            "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.12/dist-packages (from formulaic>=0.2.2->lifelines) (4.14.1)\n",
            "Requirement already satisfied: wrapt>=1.0 in /usr/local/lib/python3.12/dist-packages (from formulaic>=0.2.2->lifelines) (1.17.3)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (1.3.3)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (0.12.1)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (4.59.1)\n",
            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (1.4.9)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (25.0)\n",
            "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (11.3.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (3.2.3)\n",
            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.0->lifelines) (2.9.0.post0)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.1->lifelines) (2025.2)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.1->lifelines) (2025.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.7->matplotlib>=3.0->lifelines) (1.17.0)\n",
            "Downloading lifelines-0.30.0-py3-none-any.whl (349 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m349.3/349.3 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading formulaic-1.2.0-py3-none-any.whl (117 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.2/117.2 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading interface_meta-1.3.0-py3-none-any.whl (14 kB)\n",
            "Building wheels for collected packages: autograd-gamma\n",
            "  Building wheel for autograd-gamma (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for autograd-gamma: filename=autograd_gamma-0.5.0-py3-none-any.whl size=4030 sha256=5385cda9216dc6c3e738e2e1eec93e465b0aa4d01859e0fd8d2a99c999587951\n",
            "  Stored in directory: /root/.cache/pip/wheels/50/37/21/0a719b9d89c635e89ff24bd93b862882ad675279552013b2fb\n",
            "Successfully built autograd-gamma\n",
            "Installing collected packages: interface-meta, autograd-gamma, formulaic, lifelines\n",
            "Successfully installed autograd-gamma-0.5.0 formulaic-1.2.0 interface-meta-1.3.0 lifelines-0.30.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DCAy8nneXp70",
        "outputId": "836e0fd4-d0b1-4449-b708-dcb3807718da"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "# =========================================\n",
        "# MIMIC-IV: AKI causal inference with Notes (Batch API version)\n",
        "# =========================================\n",
        "from __future__ import annotations\n",
        "import os, re, json, math\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "from pathlib import Path\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from dotenv import load_dotenv\n",
        "from lifelines import CoxPHFitter\n",
        "\n",
        "# --- Google Drive mount (Colab) ---\n",
        "from google.colab import drive\n",
        "drive.mount('/content/drive')\n",
        "drive_path = '/content/drive/MyDrive/'\n",
        "load_dotenv(drive_path + 'Colab Notebooks/env_config/.env')\n",
        "\n",
        "# --- Config ---\n",
        "pd.set_option(\"display.max_rows\", 8)\n",
        "MIMIC_DIR = Path(\"/content/drive/MyDrive/data/mimiciv/3.1/\")\n",
        "HOSP = MIMIC_DIR / \"hosp\"\n",
        "NOTE = MIMIC_DIR / \"note\"\n",
        "OUTD = MIMIC_DIR / \"results_ci\"; OUTD.mkdir(parents=True, exist_ok=True)\n",
        "READ_KW = dict(dtype_backend=\"pyarrow\", low_memory=False)\n",
        "\n",
        "RANDOM_STATE = 7\n",
        "PS_CLIP      = (1e-3, 1-1e-3)\n",
        "W_TRIM       = (0.01, 0.99)\n",
        "VPT_WINDOW_HOURS = 6\n",
        "COX_PENALIZER = 0.1\n",
        "\n",
        "# --- Regex ---\n",
        "RX_SCR_LABEL  = \"creatinine\"\n",
        "RX_SCR_FLUID  = r\"\\b(?:serum|blood)\\b\"\n",
        "RX_MGDL       = r\"\\bmg/dl\\b\"\n",
        "RX_MGL        = r\"\\bmg/l\\b\"\n",
        "RX_VANCO_SUB  = \"vancomycin\"\n",
        "RX_PTZ        = r\"(?:piperacillin|tazobactam|zosyn)\"\n",
        "RX_EMERG_SUB  = \"EMER\"\n",
        "\n",
        "# --- Confounders of interest (LLM outputs) ---\n",
        "CONFOUNDERS = [\"f_ckd_pre\",\"f_dm_pre\",\"f_hf_pre\",\"f_liver_pre\",\"f_nephrotox_pre\"]\n",
        "\n",
        "# --- LLM Config ---\n",
        "LLM_MODEL = \"gpt-4o-mini\"\n",
        "MAX_NOTE_CHARS = 15000\n",
        "LLM_TEMPERATURE = 0.0\n",
        "LLM_MAX_TOKENS = 200"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# --- Prompt template ---\n",
        "LLM_PROMPT_TEMPLATE = \"\"\"\n",
        "You are assisting a causal inference study of AKI. Exposure = vancomycin±piperacillin/tazobactam.\n",
        "Your ONLY task: read a discharge note and mark **pre-treatment** (pre-admission or at presentation) risk factors.\n",
        "\n",
        "Rules:\n",
        "- Consider ONLY information existing **before or at presentation** relative to index_time = {index_time_iso}.\n",
        "- DO NOT mark conditions/events clearly arising during hospitalization, hospital course, ICU interventions, inpatient treatments, or discharge meds. Those are potential colliders.\n",
        "- If timing is ambiguous, be conservative and mark 0.\n",
        "- Output a compact ONE-LINE JSON with 0/1 integers. No extra text.\n",
        "\n",
        "Binary variables (confounders of interest):\n",
        "- f_ckd_pre\n",
        "- f_dm_pre\n",
        "- f_hf_pre\n",
        "- f_liver_pre\n",
        "- f_nephrotox_pre\n",
        "\n",
        "Return ONLY:\n",
        "{{\n",
        "  \"f_ckd_pre\": 0 or 1,\n",
        "  \"f_dm_pre\": 0 or 1,\n",
        "  \"f_hf_pre\": 0 or 1,\n",
        "  \"f_liver_pre\": 0 or 1,\n",
        "  \"f_nephrotox_pre\": 0 or 1\n",
        "}}\n",
        "\n",
        "Discharge note:\n",
        "---\n",
        "{note_text}\n",
        "---\n",
        "\"\"\".strip()\n",
        "\n"
      ],
      "metadata": {
        "id": "zyDhyyE2X26k"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# =========================\n",
        "# Utils\n",
        "# =========================\n",
        "def _to_datetime_safe(s, fmt=None):\n",
        "    x = pd.to_datetime(s, errors=\"coerce\", format=fmt)\n",
        "    if x.isna().any():\n",
        "        y = pd.to_datetime(s[x.isna()], errors=\"coerce\")\n",
        "        x.loc[x.isna()] = y\n",
        "    return x\n",
        "\n",
        "def _smd(x, t, w=None):\n",
        "    x = np.asarray(x, float); t = np.asarray(t, int)\n",
        "    if w is None: w = np.ones_like(t, float)\n",
        "    if (t==1).sum()==0 or (t==0).sum()==0: return np.nan\n",
        "    m1 = np.average(x[t==1], weights=w[t==1]); m0 = np.average(x[t==0], weights=w[t==0])\n",
        "    v1 = np.average((x[t==1]-m1)**2, weights=w[t==1]); v0 = np.average((x[t==0]-m0)**2, weights=w[t==0])\n",
        "    return (m1-m0)/np.sqrt((v1+v0)/2 + 1e-9)\n",
        "\n",
        "def evalue_from_hr(hr, lcl, ucl):\n",
        "    def _ev(x: float) -> float:\n",
        "        return x + math.sqrt(max(x,0)*(max(x,0)-1.0)) if x>1 else 1.0\n",
        "    return (_ev(float(hr)), _ev(float(lcl)) if float(lcl)>1 else 1.0)\n",
        "\n",
        "# =========================\n",
        "# Cohort, labs, AKI, events\n",
        "# =========================\n",
        "def build_cohort(hosp: Path) -> pd.DataFrame:\n",
        "    use_cols = [\"subject_id\",\"hadm_id\",\"starttime\",\"stoptime\",\"drug\"]\n",
        "    it = pd.read_csv(hosp/\"prescriptions.csv.gz\",\n",
        "                     usecols=lambda c: c in use_cols,\n",
        "                     chunksize=500_000, low_memory=False)\n",
        "    v_chunks, p_chunks = [], []\n",
        "    for ch in it:\n",
        "        ch[\"starttime\"] = _to_datetime_safe(ch[\"starttime\"])\n",
        "        dlow = ch[\"drug\"].astype(\"string\").str.lower()\n",
        "        v = ch[dlow.str.contains(RX_VANCO_SUB, na=False, regex=False)]\n",
        "        p = ch[dlow.str.contains(RX_PTZ, na=False, regex=True)]\n",
        "        v_chunks.append(v[[\"subject_id\",\"hadm_id\",\"starttime\"]].rename(columns={\"starttime\":\"v_start\"}))\n",
        "        p_chunks.append(p[[\"subject_id\",\"hadm_id\",\"starttime\"]].rename(columns={\"starttime\":\"p_start\"}))\n",
        "\n",
        "    v1 = (pd.concat(v_chunks).sort_values(\"v_start\")\n",
        "            .groupby([\"subject_id\",\"hadm_id\"],as_index=False)\n",
        "            .agg(index_time=(\"v_start\",\"first\")))\n",
        "    p1 = (pd.concat(p_chunks).sort_values(\"p_start\")\n",
        "            .groupby([\"subject_id\",\"hadm_id\"],as_index=False)\n",
        "            .agg(ptz_time=(\"p_start\",\"first\")))\n",
        "\n",
        "    cohort = v1.merge(p1, how=\"left\", on=[\"subject_id\",\"hadm_id\"])\n",
        "    cohort[\"vpt_flag\"] = (\n",
        "        cohort[\"ptz_time\"].notna() &\n",
        "        (cohort[\"ptz_time\"] >= cohort[\"index_time\"]) &\n",
        "        (cohort[\"ptz_time\"] <= cohort[\"index_time\"]+pd.Timedelta(hours=VPT_WINDOW_HOURS))\n",
        "    ).astype(int)\n",
        "    print(f\"[cohort] N={len(cohort)} VPT={int(cohort['vpt_flag'].sum())}\")\n",
        "    return cohort\n",
        "\n",
        "def load_scr_itemids(hosp: Path) -> list[int]:\n",
        "    d_lab = pd.read_csv(hosp/\"d_labitems.csv.gz\", **READ_KW)\n",
        "    mask = (\n",
        "        d_lab[\"label\"].astype(\"string\").str.contains(RX_SCR_LABEL, na=False, regex=False, case=False) &\n",
        "        d_lab[\"fluid\"].astype(\"string\").str.contains(RX_SCR_FLUID, na=False, regex=True, case=False)\n",
        "    )\n",
        "    ids = d_lab.loc[mask,\"itemid\"].dropna().astype(int).unique().tolist()\n",
        "    if not ids:\n",
        "        ids = d_lab[d_lab[\"label\"].astype(\"string\").str.contains(RX_SCR_LABEL, case=False)][\"itemid\"].astype(int).unique().tolist()\n",
        "    print(f\"[scr ids] K={len(ids)} sample={ids[:3]}\")\n",
        "    return ids\n",
        "\n",
        "# ... (SCr timeseries, label_aki, build_event_times 구현 동일; 생략 가능)\n",
        "\n",
        "# =========================\n",
        "# Notes → Batch JSONL\n",
        "# =========================\n",
        "def read_discharge_safe(note_dir: Path) -> pd.DataFrame:\n",
        "    raw = pd.read_csv(note_dir/\"discharge.csv.gz\", **READ_KW)\n",
        "    df = raw.rename(columns={c:c.lower() for c in raw.columns})\n",
        "    df = df[[\"subject_id\",\"hadm_id\",\"text\"]].copy()\n",
        "    df[\"text\"] = df[\"text\"].astype(\"string[python]\").fillna(\"\")\n",
        "    return df\n",
        "\n",
        "def make_batch_jsonl_exporter(df_notes, index_time_map, out_jsonl):\n",
        "    with open(out_jsonl,\"w\",encoding=\"utf-8\") as f:\n",
        "        for (sid,hid), g in df_notes.groupby([\"subject_id\",\"hadm_id\"]):\n",
        "            text = \"\\n\".join(g[\"text\"].astype(str).tolist())\n",
        "            excerpt = text[:MAX_NOTE_CHARS]\n",
        "            it = index_time_map.get((int(sid),int(hid)))\n",
        "            it_iso = pd.to_datetime(it).tz_localize(\"UTC\").isoformat() if pd.notna(it) else \"UNKNOWN\"\n",
        "            prompt = LLM_PROMPT_TEMPLATE.format(index_time_iso=it_iso, note_text=excerpt)\n",
        "            req = {\n",
        "                \"custom_id\": f\"{sid}_{hid}\",\n",
        "                \"method\":\"POST\",\n",
        "                \"url\":\"/v1/chat/completions\",\n",
        "                \"body\":{\n",
        "                    \"model\":LLM_MODEL,\n",
        "                    \"temperature\":LLM_TEMPERATURE,\n",
        "                    \"max_tokens\":LLM_MAX_TOKENS,\n",
        "                    \"response_format\":{\"type\":\"json_object\"},\n",
        "                    \"messages\":[\n",
        "                        {\"role\":\"system\",\"content\":\"Return strict JSON only.\"},\n",
        "                        {\"role\":\"user\",\"content\":prompt}\n",
        "                    ]\n",
        "                }\n",
        "            }\n",
        "            f.write(json.dumps(req)+\"\\n\")\n",
        "    print(f\"[batch-export] wrote {out_jsonl}\")\n",
        "\n",
        "def parse_batch_results(result_jsonl, out_csv):\n",
        "    rows=[]\n",
        "    with open(result_jsonl,\"r\") as f:\n",
        "        for line in f:\n",
        "            job=json.loads(line)\n",
        "            sid,hid=job[\"custom_id\"].split(\"_\")\n",
        "            try:\n",
        "                content=job[\"response\"][\"body\"][\"choices\"][0][\"message\"][\"content\"]\n",
        "                feats=json.loads(content)\n",
        "            except Exception:\n",
        "                feats={k:0 for k in CONFOUNDERS}\n",
        "            row={\"subject_id\":int(sid),\"hadm_id\":int(hid)}\n",
        "            row.update({k:int(feats.get(k,0)) for k in CONFOUNDERS})\n",
        "            rows.append(row)\n",
        "    pd.DataFrame(rows).to_csv(out_csv,index=False)\n",
        "    print(f\"[batch-parse] {len(rows)} notes → {out_csv}\")\n",
        "    return pd.DataFrame(rows)\n",
        "\n",
        "# =========================\n",
        "# PS + Cox\n",
        "# =========================\n",
        "def fit_ps_and_sw(dfc,covs):\n",
        "    X=dfc[covs].copy().fillna(dfc[covs].median())\n",
        "    T=dfc[\"vpt_flag\"].astype(int)\n",
        "    ps_model=LogisticRegression(max_iter=400,solver=\"lbfgs\")\n",
        "    ps_model.fit(X,T)\n",
        "    ps=np.clip(ps_model.predict_proba(X)[:,1],*PS_CLIP)\n",
        "    sw=np.where(T==1,T.mean()/ps,(1-T.mean())/(1-ps))\n",
        "    lo,hi=np.quantile(sw,W_TRIM); sw=np.clip(sw,lo,hi)\n",
        "    return sw,ps\n",
        "\n",
        "def cox_results(dfc,sw,covs):\n",
        "    d=pd.DataFrame({\n",
        "        \"time\":dfc[\"duration_days\"],\n",
        "        \"event\":dfc[\"event_observed\"].astype(int),\n",
        "        \"treat\":dfc[\"vpt_flag\"].astype(int),\n",
        "        \"sw\":sw\n",
        "    })\n",
        "    cph=CoxPHFitter(penalizer=COX_PENALIZER)\n",
        "    cph.fit(d,duration_col=\"time\",event_col=\"event\",weights_col=\"sw\",robust=True)\n",
        "    hr=float(np.exp(cph.params_[\"treat\"]))\n",
        "    lcl,ucl=np.exp(cph.confidence_intervals_.loc[\"treat\"].values)\n",
        "    return hr,lcl,ucl\n"
      ],
      "metadata": {
        "id": "CmBBCW68Xvc9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# =========================\n",
        "# Main\n",
        "# =========================\n",
        "if __name__==\"__main__\":\n",
        "    cohort=build_cohort(HOSP)\n",
        "    scr_ids=load_scr_itemids(HOSP)\n",
        "    # ... load_scr_timeseries → out=label_aki → evt=build_event_times\n",
        "    notes=read_discharge_safe(NOTE)\n",
        "    idx_map={(int(r.subject_id),int(r.hadm_id)):r.index_time for _,r in cohort.iterrows()}\n",
        "    make_batch_jsonl_exporter(notes, idx_map, OUTD/\"llm_batch_requests.jsonl\")\n",
        "    print(\"👉 Run Batch API with: !openai batches create -f results_ci/llm_batch_requests.jsonl -o results_ci/llm_batch_results.jsonl\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "u-9wseTwYdo9",
        "outputId": "c51d51d7-2d58-4f79-da6b-3f4284c0a5eb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[cohort] N=90327 VPT=7822\n",
            "[scr ids] K=3 sample=[50912, 52024, 52546]\n",
            "[batch-export] wrote /content/drive/MyDrive/data/mimiciv/3.1/results_ci/llm_batch_requests.jsonl\n",
            "👉 Run Batch API with: !openai batches create -f results_ci/llm_batch_requests.jsonl -o results_ci/llm_batch_results.jsonl\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!openai batches create \\\n",
        "  -f /content/drive/MyDrive/data/mimiciv/3.1/results_ci/llm_batch_requests.jsonl \\\n",
        "  -o /content/drive/MyDrive/data/mimiciv/3.1/results_ci/llm_batch_results.jsonl"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Jne90Js0YhAG",
        "outputId": "fe4f1973-5adf-49af-96a5-cada2fc1ee7f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "usage: openai [-h] [-v] [-b API_BASE] [-k API_KEY] [-p PROXY [PROXY ...]]\n",
            "              [-o ORGANIZATION] [-t {openai,azure}]\n",
            "              [--api-version API_VERSION] [--azure-endpoint AZURE_ENDPOINT]\n",
            "              [--azure-ad-token AZURE_AD_TOKEN] [-V]\n",
            "              {api,tools,migrate,grit} ...\n",
            "openai: error: argument {api,tools,migrate,grit}: invalid choice: 'batches' (choose from api, tools, migrate, grit)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!openai --help | grep batches"
      ],
      "metadata": {
        "id": "CvBCZWCCaKH_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "pwd"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 36
        },
        "id": "fO4QrT6XcSeW",
        "outputId": "d5efc29e-a5f3-4041-bea1-c05086fee1d0"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'/content'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from openai import OpenAI\n",
        "import time\n",
        "import json\n",
        "import os\n",
        "from dotenv import load_dotenv\n",
        "load_dotenv(drive_path + 'Colab Notebooks/env_config/.env')\n",
        "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\") or os.getenv(\"API_KEY\")\n",
        "client = OpenAI(api_key=OPENAI_API_KEY)"
      ],
      "metadata": {
        "id": "nyoFbIWRbjaf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "input_path = \"/content/drive/MyDrive/data/mimiciv/3.1/results_ci/llm_batch_requests.jsonl\"\n",
        "out_dir = \"/content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts\"\n",
        "os.makedirs(out_dir, exist_ok=True)\n",
        "\n",
        "max_lines = 5000  # 한 파일에 넣을 요청 수 (적절히 조정)\n",
        "with open(input_path, \"r\") as f:\n",
        "    for i, chunk_start in enumerate(range(0, sum(1 for _ in open(input_path)), max_lines)):\n",
        "        f.seek(0)\n",
        "        with open(os.path.join(out_dir, f\"part_{i}.jsonl\"), \"w\") as fout:\n",
        "            for j, line in enumerate(f):\n",
        "                if j < chunk_start:\n",
        "                    continue\n",
        "                if j >= chunk_start + max_lines:\n",
        "                    break\n",
        "                fout.write(line)\n",
        "print(\"✅ 분할 완료:\", os.listdir(out_dir))\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xhFm_WJJcvps",
        "outputId": "73655c46-26e3-4a8b-d853-829980bb0573"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "✅ 분할 완료: ['part_0.jsonl', 'part_1.jsonl', 'part_2.jsonl', 'part_3.jsonl', 'part_4.jsonl', 'part_5.jsonl', 'part_6.jsonl', 'part_7.jsonl', 'part_8.jsonl', 'part_9.jsonl', 'part_10.jsonl', 'part_11.jsonl', 'part_12.jsonl', 'part_13.jsonl', 'part_14.jsonl', 'part_15.jsonl', 'part_16.jsonl', 'part_17.jsonl', 'part_18.jsonl', 'part_19.jsonl', 'part_20.jsonl', 'part_21.jsonl', 'part_22.jsonl', 'part_23.jsonl', 'part_24.jsonl', 'part_25.jsonl', 'part_26.jsonl', 'part_27.jsonl', 'part_28.jsonl', 'part_29.jsonl', 'part_30.jsonl', 'part_31.jsonl', 'part_32.jsonl', 'part_33.jsonl', 'part_34.jsonl', 'part_35.jsonl', 'part_36.jsonl', 'part_37.jsonl', 'part_38.jsonl', 'part_39.jsonl', 'part_40.jsonl', 'part_41.jsonl', 'part_42.jsonl', 'part_43.jsonl', 'part_44.jsonl', 'part_45.jsonl', 'part_46.jsonl', 'part_47.jsonl', 'part_48.jsonl', 'part_49.jsonl', 'part_50.jsonl', 'part_51.jsonl', 'part_52.jsonl', 'part_53.jsonl', 'part_54.jsonl', 'part_55.jsonl', 'part_56.jsonl', 'part_57.jsonl', 'part_58.jsonl', 'part_59.jsonl', 'part_60.jsonl', 'part_61.jsonl', 'part_62.jsonl', 'part_63.jsonl', 'part_64.jsonl', 'part_65.jsonl', 'part_66.jsonl']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "files = [os.path.join(out_dir, f) for f in sorted(os.listdir(out_dir))]\n",
        "\n",
        "for fpath in files:\n",
        "    upload = client.files.create(\n",
        "        file=open(fpath, \"rb\"),\n",
        "        purpose=\"batch\"\n",
        "    )\n",
        "    file_id = upload.id\n",
        "    batch = client.batches.create(\n",
        "        input_file_id=file_id,\n",
        "        endpoint=\"/v1/chat/completions\",\n",
        "        completion_window=\"24h\"\n",
        "    )\n",
        "    print(f\"📌 Submitted batch for {fpath}: {batch.id}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EGFc0G1uc4pL",
        "outputId": "e14b1dce-ec2e-4694-b21b-06bf5d8fe5b5"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_0.jsonl: batch_68ac33277ba881908f0432ed60f473e9\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_1.jsonl: batch_68ac332e9c1481908cccb79dc614d246\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_10.jsonl: batch_68ac333bf958819095befee609025dbd\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_11.jsonl: batch_68ac3343ecf481909f92a88052dfa287\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_12.jsonl: batch_68ac334a9eb88190b0f68af9681b3631\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_13.jsonl: batch_68ac335221cc819082c0826644d6c5ee\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_14.jsonl: batch_68ac335a46588190a1eef0ef31895e78\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_15.jsonl: batch_68ac3360f2b48190908eff8ffbbbed82\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_16.jsonl: batch_68ac336759d08190abd706b621808bae\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_17.jsonl: batch_68ac336eb8448190bd5f3d31dad595a7\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_18.jsonl: batch_68ac3387d4008190933ecf075646f416\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_19.jsonl: batch_68ac338f4e208190ba112d0b6a0a0290\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_2.jsonl: batch_68ac3397ffe481908de581b2d20ac0e8\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_20.jsonl: batch_68ac33a0ec6081909972a1aaae8ffc65\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_21.jsonl: batch_68ac33a7b3808190aa420ffcadc94ab5\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_22.jsonl: batch_68ac33ae11e881908e47fa48d1cbc4fa\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_23.jsonl: batch_68ac33ba13548190aa81d0c8be4be29e\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_24.jsonl: batch_68ac33c1a7308190941d925a2e24913e\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_25.jsonl: batch_68ac33c920cc819080d15875e0db6524\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_26.jsonl: batch_68ac33d1d1c88190892bbaea710c38f4\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_27.jsonl: batch_68ac33d98458819093b82865422b4654\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_28.jsonl: batch_68ac33e315748190a3cd9bdbe8cb2af8\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_29.jsonl: batch_68ac33e80f7c8190ade9bc2f7539edbe\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_3.jsonl: batch_68ac33ecf754819087b02009fe45042f\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_30.jsonl: batch_68ac33f453d8819082383149be194b38\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_31.jsonl: batch_68ac3401a1548190afa514f96908a7e7\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_32.jsonl: batch_68ac340dbfb88190a7fe3103431dee41\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_33.jsonl: batch_68ac341af1248190a7c7164aa35aef8e\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_34.jsonl: batch_68ac342221a08190878a42ab2cd12bbc\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_35.jsonl: batch_68ac3487a6d48190a653b80c6d19a1c1\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_36.jsonl: batch_68ac348f155081909cafa36c7076bee4\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_37.jsonl: batch_68ac3497a6c081909d81ab1d12d4b6db\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_38.jsonl: batch_68ac349cf22c819080bd744b365cb165\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_39.jsonl: batch_68ac34a7800c81908ae2a21b7e115634\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_4.jsonl: batch_68ac34cd121c8190b261a1bbff0325cd\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_40.jsonl: batch_68ac34de864c8190b187fb040e34a812\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_41.jsonl: batch_68ac34e5c1ac8190a596c36de40f71b0\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_42.jsonl: batch_68ac34eb0b68819086801ce1f1c259b9\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_43.jsonl: batch_68ac34f458308190b61874153cb3ccac\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_44.jsonl: batch_68ac3500b7a48190839a59d909ea9f2c\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_45.jsonl: batch_68ac3507ee688190a251cac823bfe5d0\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_46.jsonl: batch_68ac350fa6ec8190a656264dbc0012dc\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_47.jsonl: batch_68ac35129c588190a0104a4220615ca2\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_48.jsonl: batch_68ac351dbdb48190ba270e033c58d50d\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_49.jsonl: batch_68ac3527acc88190806cc1a825c890a9\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_5.jsonl: batch_68ac354322d08190bbee31fa1f5d6d74\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_50.jsonl: batch_68ac354adc0881908962be9da3ae6e5d\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_51.jsonl: batch_68ac35583aac8190b1c851211d37969d\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_52.jsonl: batch_68ac355e77288190861d5e68e3466dcc\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_53.jsonl: batch_68ac35663b0c8190904ae58f2f27b4fe\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_54.jsonl: batch_68ac356cd1948190b9a3709068077b4e\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_55.jsonl: batch_68ac3573e8bc8190a3527034d9907bf7\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_56.jsonl: batch_68ac357dc55c8190b51652e68f0f7e26\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_57.jsonl: batch_68ac358767488190bafbe8bed657efd3\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_58.jsonl: batch_68ac35a403e48190b307119df17f3db7\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_59.jsonl: batch_68ac35b02ff8819083a5cb31843458ef\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_6.jsonl: batch_68ac35bcb42481908a219aad43814bea\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_60.jsonl: batch_68ac35c3a4e48190bf1d648602703c21\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_61.jsonl: batch_68ac35cd6b288190b92b47c9a47c70eb\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_62.jsonl: batch_68ac35dfc4e48190aca665f8511c1a0f\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_63.jsonl: batch_68ac35fae7dc8190aaec05c4c01adf5d\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_64.jsonl: batch_68ac360212848190bff9635212dd7f0b\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_65.jsonl: batch_68ac360bea808190b800d9bf96053033\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_66.jsonl: batch_68ac360f13c4819086d7deb34934489c\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_7.jsonl: batch_68ac3615edf4819083ba8353ebe312d9\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_8.jsonl: batch_68ac361f1e04819082059447e6484f5e\n",
            "📌 Submitted batch for /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_parts/part_9.jsonl: batch_68ac3626e4d48190aa35489149fa4cab\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "batch_ids = [\n",
        "    \"batch_68ac33277ba881908f0432ed60f473e9\",\n",
        "    \"batch_68ac332e9c1481908cccb79dc614d246\",\n",
        "    \"batch_68ac333bf958819095befee609025dbd\",\n",
        "    \"batch_68ac3343ecf481909f92a88052dfa287\",\n",
        "    \"batch_68ac334a9eb88190b0f68af9681b3631\",\n",
        "    \"batch_68ac335221cc819082c0826644d6c5ee\",\n",
        "    \"batch_68ac335a46588190a1eef0ef31895e78\",\n",
        "    \"batch_68ac3360f2b48190908eff8ffbbbed82\",\n",
        "    \"batch_68ac336759d08190abd706b621808bae\",\n",
        "    \"batch_68ac336eb8448190bd5f3d31dad595a7\",\n",
        "    \"batch_68ac3387d4008190933ecf075646f416\",\n",
        "    \"batch_68ac338f4e208190ba112d0b6a0a0290\",\n",
        "    \"batch_68ac3397ffe481908de581b2d20ac0e8\",\n",
        "    \"batch_68ac33a0ec6081909972a1aaae8ffc65\",\n",
        "    \"batch_68ac33a7b3808190aa420ffcadc94ab5\",\n",
        "    \"batch_68ac33ae11e881908e47fa48d1cbc4fa\",\n",
        "    \"batch_68ac33ba13548190aa81d0c8be4be29e\",\n",
        "    \"batch_68ac33c1a7308190941d925a2e24913e\",\n",
        "    \"batch_68ac33c920cc819080d15875e0db6524\",\n",
        "    \"batch_68ac33d1d1c88190892bbaea710c38f4\",\n",
        "    \"batch_68ac33d98458819093b82865422b4654\",\n",
        "    \"batch_68ac33e315748190a3cd9bdbe8cb2af8\",\n",
        "    \"batch_68ac33e80f7c8190ade9bc2f7539edbe\",\n",
        "    \"batch_68ac33ecf754819087b02009fe45042f\",\n",
        "    \"batch_68ac33f453d8819082383149be194b38\",\n",
        "    \"batch_68ac3401a1548190afa514f96908a7e7\",\n",
        "    \"batch_68ac340dbfb88190a7fe3103431dee41\",\n",
        "    \"batch_68ac341af1248190a7c7164aa35aef8e\",\n",
        "    \"batch_68ac342221a08190878a42ab2cd12bbc\",\n",
        "    \"batch_68ac3487a6d48190a653b80c6d19a1c1\",\n",
        "    \"batch_68ac348f155081909cafa36c7076bee4\",\n",
        "    \"batch_68ac3497a6c081909d81ab1d12d4b6db\",\n",
        "    \"batch_68ac349cf22c819080bd744b365cb165\",\n",
        "    \"batch_68ac34a7800c81908ae2a21b7e115634\",\n",
        "    \"batch_68ac34cd121c8190b261a1bbff0325cd\",\n",
        "    \"batch_68ac34de864c8190b187fb040e34a812\",\n",
        "    \"batch_68ac34e5c1ac8190a596c36de40f71b0\",\n",
        "    \"batch_68ac34eb0b68819086801ce1f1c259b9\",\n",
        "    \"batch_68ac34f458308190b61874153cb3ccac\",\n",
        "    \"batch_68ac3500b7a48190839a59d909ea9f2c\",\n",
        "    \"batch_68ac3507ee688190a251cac823bfe5d0\",\n",
        "    \"batch_68ac350fa6ec8190a656264dbc0012dc\",\n",
        "    \"batch_68ac35129c588190a0104a4220615ca2\",\n",
        "    \"batch_68ac351dbdb48190ba270e033c58d50d\",\n",
        "    \"batch_68ac3527acc88190806cc1a825c890a9\",\n",
        "    \"batch_68ac354322d08190bbee31fa1f5d6d74\",\n",
        "    \"batch_68ac354adc0881908962be9da3ae6e5d\",\n",
        "    \"batch_68ac35583aac8190b1c851211d37969d\",\n",
        "    \"batch_68ac355e77288190861d5e68e3466dcc\",\n",
        "    \"batch_68ac35663b0c8190904ae58f2f27b4fe\",\n",
        "    \"batch_68ac356cd1948190b9a3709068077b4e\",\n",
        "    \"batch_68ac3573e8bc8190a3527034d9907bf7\",\n",
        "    \"batch_68ac357dc55c8190b51652e68f0f7e26\",\n",
        "    \"batch_68ac358767488190bafbe8bed657efd3\",\n",
        "    \"batch_68ac35a403e48190b307119df17f3db7\",\n",
        "    \"batch_68ac35b02ff8819083a5cb31843458ef\",\n",
        "    \"batch_68ac35bcb42481908a219aad43814bea\",\n",
        "    \"batch_68ac35c3a4e48190bf1d648602703c21\",\n",
        "    \"batch_68ac35cd6b288190b92b47c9a47c70eb\",\n",
        "    \"batch_68ac35dfc4e48190aca665f8511c1a0f\",\n",
        "    \"batch_68ac35fae7dc8190aaec05c4c01adf5d\",\n",
        "    \"batch_68ac360212848190bff9635212dd7f0b\",\n",
        "    \"batch_68ac360bea808190b800d9bf96053033\",\n",
        "    \"batch_68ac360f13c4819086d7deb34934489c\",\n",
        "    \"batch_68ac3615edf4819083ba8353ebe312d9\",\n",
        "    \"batch_68ac361f1e04819082059447e6484f5e\",\n",
        "    \"batch_68ac3626e4d48190aa35489149fa4cab\",\n",
        "]\n"
      ],
      "metadata": {
        "id": "VM-bUfouvKrW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "OUTD = Path(\"/content/drive/MyDrive/data/mimiciv/3.1/results_ci\")\n",
        "RES_DIR = OUTD / \"batch_results\"; RES_DIR.mkdir(parents=True, exist_ok=True)\n",
        "\n",
        "# 2) 상태 한 번에 조회 (원하면 루프 돌려서 폴링도 가능)\n",
        "def list_statuses(batch_ids):\n",
        "    rows = []\n",
        "    for bid in batch_ids:\n",
        "        try:\n",
        "            b = client.batches.retrieve(bid)\n",
        "            rows.append({\"batch_id\": bid, \"status\": b.status})\n",
        "        except Exception as e:\n",
        "            rows.append({\"batch_id\": bid, \"status\": f\"error: {e}\"})\n",
        "    return pd.DataFrame(rows)\n",
        "\n",
        "status_df = list_statuses(batch_ids)\n",
        "print(status_df)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jV_J7GNJ2SFG",
        "outputId": "25ec3308-ca2a-4557-de0c-69b8687bf25e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                                  batch_id     status\n",
            "0   batch_68ac33277ba881908f0432ed60f473e9  completed\n",
            "1   batch_68ac332e9c1481908cccb79dc614d246  completed\n",
            "2   batch_68ac333bf958819095befee609025dbd  completed\n",
            "3   batch_68ac3343ecf481909f92a88052dfa287  completed\n",
            "..                                     ...        ...\n",
            "63  batch_68ac360f13c4819086d7deb34934489c  completed\n",
            "64  batch_68ac3615edf4819083ba8353ebe312d9  completed\n",
            "65  batch_68ac361f1e04819082059447e6484f5e  completed\n",
            "66  batch_68ac3626e4d48190aa35489149fa4cab  completed\n",
            "\n",
            "[67 rows x 2 columns]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "status_df"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 361
        },
        "id": "3R4CaqtN2ZCo",
        "outputId": "5e835e66-e3b0-4575-95a1-0a60b91eec1d"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                  batch_id     status\n",
              "0   batch_68ac33277ba881908f0432ed60f473e9  completed\n",
              "1   batch_68ac332e9c1481908cccb79dc614d246  completed\n",
              "2   batch_68ac333bf958819095befee609025dbd  completed\n",
              "3   batch_68ac3343ecf481909f92a88052dfa287  completed\n",
              "..                                     ...        ...\n",
              "63  batch_68ac360f13c4819086d7deb34934489c  completed\n",
              "64  batch_68ac3615edf4819083ba8353ebe312d9  completed\n",
              "65  batch_68ac361f1e04819082059447e6484f5e  completed\n",
              "66  batch_68ac3626e4d48190aa35489149fa4cab  completed\n",
              "\n",
              "[67 rows x 2 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-1dae9e3e-c03b-4811-9563-2a4a5c99ced6\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>batch_id</th>\n",
              "      <th>status</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>batch_68ac33277ba881908f0432ed60f473e9</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>batch_68ac332e9c1481908cccb79dc614d246</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>batch_68ac333bf958819095befee609025dbd</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>batch_68ac3343ecf481909f92a88052dfa287</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>63</th>\n",
              "      <td>batch_68ac360f13c4819086d7deb34934489c</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>64</th>\n",
              "      <td>batch_68ac3615edf4819083ba8353ebe312d9</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>65</th>\n",
              "      <td>batch_68ac361f1e04819082059447e6484f5e</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>66</th>\n",
              "      <td>batch_68ac3626e4d48190aa35489149fa4cab</td>\n",
              "      <td>completed</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>67 rows × 2 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1dae9e3e-c03b-4811-9563-2a4a5c99ced6')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-1dae9e3e-c03b-4811-9563-2a4a5c99ced6 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-1dae9e3e-c03b-4811-9563-2a4a5c99ced6');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-7541fce7-3209-42d4-b5e8-cb6f244636b9\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-7541fce7-3209-42d4-b5e8-cb6f244636b9')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-7541fce7-3209-42d4-b5e8-cb6f244636b9 button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "  <div id=\"id_724d0e03-3bcc-44d9-8b52-ac273ba495d6\">\n",
              "    <style>\n",
              "      .colab-df-generate {\n",
              "        background-color: #E8F0FE;\n",
              "        border: none;\n",
              "        border-radius: 50%;\n",
              "        cursor: pointer;\n",
              "        display: none;\n",
              "        fill: #1967D2;\n",
              "        height: 32px;\n",
              "        padding: 0 0 0 0;\n",
              "        width: 32px;\n",
              "      }\n",
              "\n",
              "      .colab-df-generate:hover {\n",
              "        background-color: #E2EBFA;\n",
              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "        fill: #174EA6;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate {\n",
              "        background-color: #3B4455;\n",
              "        fill: #D2E3FC;\n",
              "      }\n",
              "\n",
              "      [theme=dark] .colab-df-generate:hover {\n",
              "        background-color: #434B5C;\n",
              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "        fill: #FFFFFF;\n",
              "      }\n",
              "    </style>\n",
              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('status_df')\"\n",
              "            title=\"Generate code using this dataframe.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "    <script>\n",
              "      (() => {\n",
              "      const buttonEl =\n",
              "        document.querySelector('#id_724d0e03-3bcc-44d9-8b52-ac273ba495d6 button.colab-df-generate');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      buttonEl.onclick = () => {\n",
              "        google.colab.notebook.generateWithVariable('status_df');\n",
              "      }\n",
              "      })();\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "status_df",
              "summary": "{\n  \"name\": \"status_df\",\n  \"rows\": 67,\n  \"fields\": [\n    {\n      \"column\": \"batch_id\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 67,\n        \"samples\": [\n          \"batch_68ac34e5c1ac8190a596c36de40f71b0\",\n          \"batch_68ac33ba13548190aa81d0c8be4be29e\",\n          \"batch_68ac334a9eb88190b0f68af9681b3631\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"status\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 1,\n        \"samples\": [\n          \"completed\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 3) 완료될 때까지 폴링하고, 완료된 것만 다운로드\n",
        "def wait_and_download_all(batch_ids, poll_sec=60):\n",
        "    completed, failed = set(), {}\n",
        "    outputs = {}\n",
        "    while True:\n",
        "        num_done = len(completed) + len(failed)\n",
        "        for bid in batch_ids:\n",
        "            if bid in completed or bid in failed:\n",
        "                continue\n",
        "            try:\n",
        "                b = client.batches.retrieve(bid)\n",
        "                s = b.status\n",
        "                ts = dt.datetime.now().strftime(\"%H:%M:%S\")\n",
        "                print(f\"[{ts}] {bid}: {s}\")\n",
        "                if s in (\"completed\", \"failed\", \"cancelled\", \"expired\"):\n",
        "                    if s == \"completed\":\n",
        "                        try:\n",
        "                            fid = b.output_file_id\n",
        "                            resp = client.files.content(fid)\n",
        "                            data = resp if isinstance(resp, (bytes, bytearray)) else getattr(resp, \"content\", None)\n",
        "                            if data is None:\n",
        "                                # 일부 SDK에서는 .read()가 필요\n",
        "                                data = resp.read()\n",
        "                            out_path = RES_DIR / f\"{bid}.jsonl\"\n",
        "                            with open(out_path, \"wb\") as f:\n",
        "                                f.write(data)\n",
        "                            outputs[bid] = str(out_path)\n",
        "                            completed.add(bid)\n",
        "                            print(f\" ✅ downloaded → {out_path}\")\n",
        "                        except Exception as de:\n",
        "                            failed[bid] = f\"download_error: {de}\"\n",
        "                            print(f\" ❌ download failed for {bid}: {de}\")\n",
        "                    else:\n",
        "                        failed[bid] = s\n",
        "                        print(f\" ❌ {bid} finished with status={s}\")\n",
        "            except Exception as e:\n",
        "                failed[bid] = f\"retrieve_error: {e}\"\n",
        "                print(f\" ❌ retrieve failed for {bid}: {e}\")\n",
        "\n",
        "        if len(completed) + len(failed) == len(batch_ids):\n",
        "            break\n",
        "        time.sleep(poll_sec)\n",
        "    return outputs, failed\n"
      ],
      "metadata": {
        "id": "o7jeAvNt2d4v"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "status_counts = {}\n",
        "for bid in batch_ids:\n",
        "    s = client.batches.retrieve(bid).status\n",
        "    status_counts[s] = status_counts.get(s, 0) + 1\n",
        "    print(f\"{bid}: {s}\")\n",
        "\n",
        "# 완료율 계산\n",
        "n_total = len(batch_ids)\n",
        "n_done = status_counts.get(\"completed\", 0)\n",
        "print(\"\\n=== Summary ===\")\n",
        "print(\"Total:\", n_total)\n",
        "print(\"Completed:\", n_done)\n",
        "print(\"Pending :\", n_total - n_done)\n",
        "print(f\"Progress: {n_done}/{n_total} ({n_done/n_total:.1%})\")\n",
        "print(\"By status:\", status_counts)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gkLYDIVEyLsL",
        "outputId": "f87307fa-39fa-4614-a304-f52a29cefa2c"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "batch_68ac33277ba881908f0432ed60f473e9: completed\n",
            "batch_68ac332e9c1481908cccb79dc614d246: completed\n",
            "batch_68ac333bf958819095befee609025dbd: completed\n",
            "batch_68ac3343ecf481909f92a88052dfa287: in_progress\n",
            "batch_68ac334a9eb88190b0f68af9681b3631: completed\n",
            "batch_68ac335221cc819082c0826644d6c5ee: completed\n",
            "batch_68ac335a46588190a1eef0ef31895e78: in_progress\n",
            "batch_68ac3360f2b48190908eff8ffbbbed82: completed\n",
            "batch_68ac336759d08190abd706b621808bae: completed\n",
            "batch_68ac336eb8448190bd5f3d31dad595a7: completed\n",
            "batch_68ac3387d4008190933ecf075646f416: completed\n",
            "batch_68ac338f4e208190ba112d0b6a0a0290: completed\n",
            "batch_68ac3397ffe481908de581b2d20ac0e8: in_progress\n",
            "batch_68ac33a0ec6081909972a1aaae8ffc65: completed\n",
            "batch_68ac33a7b3808190aa420ffcadc94ab5: completed\n",
            "batch_68ac33ae11e881908e47fa48d1cbc4fa: completed\n",
            "batch_68ac33ba13548190aa81d0c8be4be29e: completed\n",
            "batch_68ac33c1a7308190941d925a2e24913e: completed\n",
            "batch_68ac33c920cc819080d15875e0db6524: completed\n",
            "batch_68ac33d1d1c88190892bbaea710c38f4: completed\n",
            "batch_68ac33d98458819093b82865422b4654: completed\n",
            "batch_68ac33e315748190a3cd9bdbe8cb2af8: completed\n",
            "batch_68ac33e80f7c8190ade9bc2f7539edbe: completed\n",
            "batch_68ac33ecf754819087b02009fe45042f: completed\n",
            "batch_68ac33f453d8819082383149be194b38: completed\n",
            "batch_68ac3401a1548190afa514f96908a7e7: completed\n",
            "batch_68ac340dbfb88190a7fe3103431dee41: completed\n",
            "batch_68ac341af1248190a7c7164aa35aef8e: in_progress\n",
            "batch_68ac342221a08190878a42ab2cd12bbc: completed\n",
            "batch_68ac3487a6d48190a653b80c6d19a1c1: completed\n",
            "batch_68ac348f155081909cafa36c7076bee4: validating\n",
            "batch_68ac3497a6c081909d81ab1d12d4b6db: completed\n",
            "batch_68ac349cf22c819080bd744b365cb165: completed\n",
            "batch_68ac34a7800c81908ae2a21b7e115634: completed\n",
            "batch_68ac34cd121c8190b261a1bbff0325cd: completed\n",
            "batch_68ac34de864c8190b187fb040e34a812: completed\n",
            "batch_68ac34e5c1ac8190a596c36de40f71b0: completed\n",
            "batch_68ac34eb0b68819086801ce1f1c259b9: completed\n",
            "batch_68ac34f458308190b61874153cb3ccac: completed\n",
            "batch_68ac3500b7a48190839a59d909ea9f2c: completed\n",
            "batch_68ac3507ee688190a251cac823bfe5d0: completed\n",
            "batch_68ac350fa6ec8190a656264dbc0012dc: completed\n",
            "batch_68ac35129c588190a0104a4220615ca2: completed\n",
            "batch_68ac351dbdb48190ba270e033c58d50d: completed\n",
            "batch_68ac3527acc88190806cc1a825c890a9: completed\n",
            "batch_68ac354322d08190bbee31fa1f5d6d74: in_progress\n",
            "batch_68ac354adc0881908962be9da3ae6e5d: in_progress\n",
            "batch_68ac35583aac8190b1c851211d37969d: completed\n",
            "batch_68ac355e77288190861d5e68e3466dcc: completed\n",
            "batch_68ac35663b0c8190904ae58f2f27b4fe: completed\n",
            "batch_68ac356cd1948190b9a3709068077b4e: completed\n",
            "batch_68ac3573e8bc8190a3527034d9907bf7: completed\n",
            "batch_68ac357dc55c8190b51652e68f0f7e26: completed\n",
            "batch_68ac358767488190bafbe8bed657efd3: completed\n",
            "batch_68ac35a403e48190b307119df17f3db7: completed\n",
            "batch_68ac35b02ff8819083a5cb31843458ef: completed\n",
            "batch_68ac35bcb42481908a219aad43814bea: in_progress\n",
            "batch_68ac35c3a4e48190bf1d648602703c21: completed\n",
            "batch_68ac35cd6b288190b92b47c9a47c70eb: completed\n",
            "batch_68ac35dfc4e48190aca665f8511c1a0f: completed\n",
            "batch_68ac35fae7dc8190aaec05c4c01adf5d: in_progress\n",
            "batch_68ac360212848190bff9635212dd7f0b: completed\n",
            "batch_68ac360bea808190b800d9bf96053033: completed\n",
            "batch_68ac360f13c4819086d7deb34934489c: completed\n",
            "batch_68ac3615edf4819083ba8353ebe312d9: completed\n",
            "batch_68ac361f1e04819082059447e6484f5e: completed\n",
            "batch_68ac3626e4d48190aa35489149fa4cab: in_progress\n",
            "\n",
            "=== Summary ===\n",
            "Total: 67\n",
            "Completed: 57\n",
            "Pending : 10\n",
            "Progress: 57/67 (85.1%)\n",
            "By status: {'completed': 57, 'in_progress': 9, 'validating': 1}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import io\n",
        "import json\n",
        "import time\n",
        "from typing import Any, Dict, List, Optional\n",
        "\n",
        "from tqdm import tqdm\n",
        "\n",
        "# ---------------------------\n",
        "# 유틸: JSONL 한 줄을 DataFrame 행으로 파싱\n",
        "# ---------------------------\n",
        "def extract_chat_content(body: Dict[str, Any]) -> Dict[str, Any]:\n",
        "    \"\"\"chat.completions 응답 body에서 유용 필드 추출\"\"\"\n",
        "    model = body.get(\"model\")\n",
        "    usage = body.get(\"usage\") or {}\n",
        "    choices = body.get(\"choices\") or []\n",
        "\n",
        "    # 기본값\n",
        "    text = None\n",
        "    finish_reason = None\n",
        "    role = None\n",
        "\n",
        "    if choices:\n",
        "        c0 = choices[0] or {}\n",
        "        msg = c0.get(\"message\") or {}\n",
        "        role = msg.get(\"role\")\n",
        "        # OpenAI 포맷: content는 string\n",
        "        text = msg.get(\"content\")\n",
        "        finish_reason = c0.get(\"finish_reason\")\n",
        "\n",
        "    # 요청 프롬프트(있을 수도, 없을 수도)\n",
        "    # 일부 라인에는 input 필드가 없으므로 None 허용\n",
        "    # 배치 input에 custom_id를 같이 넘겼다면, req_line[\"custom_id\"]에서 가져옵니다(아래에서).\n",
        "    prompt_tokens = usage.get(\"prompt_tokens\")\n",
        "    completion_tokens = usage.get(\"completion_tokens\")\n",
        "    total_tokens = usage.get(\"total_tokens\")\n",
        "\n",
        "    return {\n",
        "        \"model\": model,\n",
        "        \"assistant_role\": role,\n",
        "        \"assistant_text\": text,\n",
        "        \"finish_reason\": finish_reason,\n",
        "        \"prompt_tokens\": prompt_tokens,\n",
        "        \"completion_tokens\": completion_tokens,\n",
        "        \"total_tokens\": total_tokens,\n",
        "        # body.get(\"id\") → chatcmpl-xxx\n",
        "        \"response_body_id\": body.get(\"id\"),\n",
        "        \"created\": body.get(\"created\"),\n",
        "        \"system_fingerprint\": body.get(\"system_fingerprint\"),\n",
        "    }\n",
        "\n",
        "def parse_jsonl_line(line_obj: Dict[str, Any], batch_id: str) -> Dict[str, Any]:\n",
        "    \"\"\"\n",
        "    배치 결과 JSONL 한 줄은 대략:\n",
        "    {\n",
        "      \"id\": \"req_xxx\",\n",
        "      \"custom_id\": \"...\",                # (옵션) 배치 입력에서 넣은 값\n",
        "      \"response\": {                      # 성공 시\n",
        "        \"status_code\": 200,\n",
        "        \"request_id\": \"req_xxx\",\n",
        "        \"body\": { ... chat.completions ... }\n",
        "      }\n",
        "      OR\n",
        "      \"error\": { \"message\": \"...\", \"code\": \"...\", ... }  # 실패 시\n",
        "    }\n",
        "    \"\"\"\n",
        "    base = {\n",
        "        \"batch_id\": batch_id,\n",
        "        \"request_id\": line_obj.get(\"id\"),\n",
        "        \"custom_id\": line_obj.get(\"custom_id\"),\n",
        "    }\n",
        "\n",
        "    if \"response\" in line_obj and isinstance(line_obj[\"response\"], dict):\n",
        "        resp = line_obj[\"response\"]\n",
        "        status_code = resp.get(\"status_code\")\n",
        "        req_id = resp.get(\"request_id\")\n",
        "        body = resp.get(\"body\") or {}\n",
        "        parsed = extract_chat_content(body)\n",
        "        base.update(\n",
        "            {\n",
        "                \"http_status\": status_code,\n",
        "                \"api_request_id\": req_id,\n",
        "                \"error\": None,\n",
        "                \"error_code\": None,\n",
        "                **parsed,\n",
        "            }\n",
        "        )\n",
        "    else:\n",
        "        # 에러 라인\n",
        "        err = line_obj.get(\"error\") or {}\n",
        "        base.update(\n",
        "            {\n",
        "                \"http_status\": None,\n",
        "                \"api_request_id\": None,\n",
        "                \"error\": err.get(\"message\"),\n",
        "                \"error_code\": err.get(\"code\"),\n",
        "                \"model\": None,\n",
        "                \"assistant_role\": None,\n",
        "                \"assistant_text\": None,\n",
        "                \"finish_reason\": None,\n",
        "                \"prompt_tokens\": None,\n",
        "                \"completion_tokens\": None,\n",
        "                \"total_tokens\": None,\n",
        "                \"response_body_id\": None,\n",
        "                \"created\": None,\n",
        "                \"system_fingerprint\": None,\n",
        "            }\n",
        "        )\n",
        "\n",
        "    return base\n",
        "\n",
        "def read_jsonl_bytes(raw_bytes: bytes) -> List[Dict[str, Any]]:\n",
        "    out = []\n",
        "    for line in io.BytesIO(raw_bytes).read().splitlines():\n",
        "        if not line:\n",
        "            continue\n",
        "        try:\n",
        "            obj = json.loads(line.decode(\"utf-8\"))\n",
        "            out.append(obj)\n",
        "        except Exception:\n",
        "            # 손상된 라인 방어\n",
        "            out.append({\"error\": {\"message\": \"JSONL parse error\"}, \"raw\": line.decode(\"utf-8\", \"ignore\")})\n",
        "    return out\n",
        "\n",
        "# ---------------------------\n",
        "# 2) 배치 순회 + 결과 수집\n",
        "# ---------------------------\n",
        "all_rows: List[Dict[str, Any]] = []\n",
        "meta_rows: List[Dict[str, Any]] = []\n",
        "\n",
        "for bid in tqdm(batch_ids, desc=\"Retrieve & download\"):\n",
        "    # 과도 호출 방지\n",
        "    time.sleep(0.05)\n",
        "\n",
        "    batch = client.batches.retrieve(bid)\n",
        "\n",
        "    meta_rows.append(\n",
        "        {\n",
        "            \"batch_id\": batch.id,\n",
        "            \"status\": batch.status,\n",
        "            \"completion_window\": batch.completion_window,\n",
        "            \"created_at\": batch.created_at,\n",
        "            \"incomplete_details\": getattr(batch, \"incomplete_details\", None),\n",
        "            \"request_counts\": getattr(batch, \"request_counts\", None),\n",
        "            \"input_file_id\": getattr(batch, \"input_file_id\", None),\n",
        "            \"output_file_id\": getattr(batch, \"output_file_id\", None),\n",
        "            \"error_file_id\": getattr(batch, \"error_file_id\", None),\n",
        "            \"finalizing_at\": getattr(batch, \"finalizing_at\", None),\n",
        "            \"completed_at\": getattr(batch, \"completed_at\", None),\n",
        "            \"expires_at\": getattr(batch, \"expires_at\", None),\n",
        "        }\n",
        "    )\n",
        "\n",
        "    if getattr(batch, \"status\", None) != \"completed\":\n",
        "        continue\n",
        "\n",
        "    output_file_id = getattr(batch, \"output_file_id\", None)\n",
        "    if not output_file_id:\n",
        "        continue\n",
        "\n",
        "    # 파일 다운로드 (SDK v1: stream 객체에 .read())\n",
        "    try:\n",
        "        stream = client.files.content(output_file_id)\n",
        "        raw = stream.read()\n",
        "    except AttributeError:\n",
        "        # 일부 버전 대응: .content 속성 보유 시\n",
        "        raw = client.files.content(output_file_id).content\n",
        "\n",
        "    # JSONL 파싱\n",
        "    lines = read_jsonl_bytes(raw)\n",
        "    for obj in lines:\n",
        "        row = parse_jsonl_line(obj, batch_id=bid)\n",
        "        all_rows.append(row)\n",
        "\n",
        "# ---------------------------\n",
        "# 3) DataFrame 생성\n",
        "# ---------------------------\n",
        "df_meta = pd.DataFrame(meta_rows)\n",
        "\n",
        "# 완료된 배치의 output 라인들을 담은 DF\n",
        "df_outputs = pd.DataFrame(all_rows)\n",
        "\n",
        "# 컬럼 순서 가독성 정리\n",
        "preferred_cols = [\n",
        "    \"batch_id\",\n",
        "    \"request_id\",\n",
        "    \"custom_id\",\n",
        "    \"http_status\",\n",
        "    \"api_request_id\",\n",
        "    \"model\",\n",
        "    \"assistant_role\",\n",
        "    \"assistant_text\",\n",
        "    \"finish_reason\",\n",
        "    \"prompt_tokens\",\n",
        "    \"completion_tokens\",\n",
        "    \"total_tokens\",\n",
        "    \"response_body_id\",\n",
        "    \"created\",\n",
        "    \"system_fingerprint\",\n",
        "    \"error\",\n",
        "    \"error_code\",\n",
        "]\n",
        "other_cols = [c for c in df_outputs.columns if c not in preferred_cols]\n",
        "df_outputs = df_outputs[preferred_cols + other_cols]\n",
        "\n",
        "print(\"=== Meta (batches) ===\")\n",
        "print(df_meta.head())\n",
        "print(\"\\n=== Outputs (merged) ===\")\n",
        "print(df_outputs.head())\n",
        "\n",
        "# 4) 필요시 저장\n",
        "df_meta.to_csv(\"batches_meta.csv\", index=False)\n",
        "df_outputs.to_csv(\"batches_outputs.csv\", index=False)\n",
        "df_outputs.to_parquet(\"batches_outputs.parquet\", index=False)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ryvdL-hfqz9E",
        "outputId": "72290199-e005-4600-b438-511c1e327481"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Retrieve & download: 100%|██████████| 67/67 [02:57<00:00,  2.65s/it]\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "=== Meta (batches) ===\n",
            "                                 batch_id     status completion_window  \\\n",
            "0  batch_68ac33277ba881908f0432ed60f473e9  completed               24h   \n",
            "1  batch_68ac332e9c1481908cccb79dc614d246  completed               24h   \n",
            "2  batch_68ac333bf958819095befee609025dbd  completed               24h   \n",
            "3  batch_68ac3343ecf481909f92a88052dfa287  completed               24h   \n",
            "4  batch_68ac334a9eb88190b0f68af9681b3631  completed               24h   \n",
            "\n",
            "   created_at incomplete_details  \\\n",
            "0  1756115751               None   \n",
            "1  1756115758               None   \n",
            "2  1756115771               None   \n",
            "3  1756115779               None   \n",
            "4  1756115786               None   \n",
            "\n",
            "                                      request_counts  \\\n",
            "0  BatchRequestCounts(completed=5000, failed=0, t...   \n",
            "1  BatchRequestCounts(completed=5000, failed=0, t...   \n",
            "2  BatchRequestCounts(completed=5000, failed=0, t...   \n",
            "3  BatchRequestCounts(completed=5000, failed=0, t...   \n",
            "4  BatchRequestCounts(completed=5000, failed=0, t...   \n",
            "\n",
            "                 input_file_id               output_file_id error_file_id  \\\n",
            "0  file-LR3wFgp6WDJeeCsxdkYz8R  file-7BS3nnNuyPiRuJpmaFB591          None   \n",
            "1  file-DD5cEJRGDBHf6dDcVyir9U  file-8qqELpMwaSDVugRH27TwVV          None   \n",
            "2  file-E9vP4vySW56k96CLCrdrN8  file-WenZYpmPR9XWJ1CsmuUyuX          None   \n",
            "3  file-HNvd7evsPK5p129jLQDdJx  file-38U6F5QzbZJgn58ardcNS7          None   \n",
            "4  file-6KWArwdkAUXrpb4egmURmZ  file-JSPfXpfeAyvTyoa1RNgJYx          None   \n",
            "\n",
            "   finalizing_at  completed_at  expires_at  \n",
            "0     1756117245    1756117434  1756202151  \n",
            "1     1756117697    1756118694  1756202158  \n",
            "2     1756116748    1756117230  1756202171  \n",
            "3     1756178101    1756178444  1756202179  \n",
            "4     1756117167    1756117651  1756202186  \n",
            "\n",
            "=== Outputs (merged) ===\n",
            "                                 batch_id  \\\n",
            "0  batch_68ac33277ba881908f0432ed60f473e9   \n",
            "1  batch_68ac33277ba881908f0432ed60f473e9   \n",
            "2  batch_68ac33277ba881908f0432ed60f473e9   \n",
            "3  batch_68ac33277ba881908f0432ed60f473e9   \n",
            "4  batch_68ac33277ba881908f0432ed60f473e9   \n",
            "\n",
            "                                   request_id          custom_id  http_status  \\\n",
            "0  batch_req_68ac38fdfcf88190afdd4734d0178bec  10000032_22595853          200   \n",
            "1  batch_req_68ac38fdf5248190b2e9094e23f2be13  10000032_22841357          200   \n",
            "2  batch_req_68ac38fdf678819080f49e041d96fe70  10000032_25742920          200   \n",
            "3  batch_req_68ac38fdf81081908d3095c4a15c7ed4  10000032_29079034          200   \n",
            "4  batch_req_68ac38fdfbc8819086cbd3b73aa5b354  10000084_23052089          200   \n",
            "\n",
            "                     api_request_id                   model assistant_role  \\\n",
            "0  09d9c1519aa33749a6511a0199adb022  gpt-4o-mini-2024-07-18      assistant   \n",
            "1  b531ba2fd53a746599a35a3e35553099  gpt-4o-mini-2024-07-18      assistant   \n",
            "2  da42a4a8fabba835d4e7f07083d4f81d  gpt-4o-mini-2024-07-18      assistant   \n",
            "3  51c0a3ea5656d56c8b1301ef64c8d531  gpt-4o-mini-2024-07-18      assistant   \n",
            "4  f57a3d858c6d76e70ab21a61781240f0  gpt-4o-mini-2024-07-18      assistant   \n",
            "\n",
            "                                      assistant_text finish_reason  \\\n",
            "0  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...          stop   \n",
            "1  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...          stop   \n",
            "2  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...          stop   \n",
            "3  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...          stop   \n",
            "4  {\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 0,\\n  \"f_h...          stop   \n",
            "\n",
            "   prompt_tokens  completion_tokens  total_tokens  \\\n",
            "0           2521                 53          2574   \n",
            "1           3424                 53          3477   \n",
            "2           3459                 53          3512   \n",
            "3           3381                 53          3434   \n",
            "4           3009                 53          3062   \n",
            "\n",
            "                         response_body_id     created system_fingerprint  \\\n",
            "0  chatcmpl-C8OLATG3pViogJE2wR9kIRhwSKeNq  1756116224      fp_51db84afab   \n",
            "1  chatcmpl-C8OLEiHJjus6hcnAFhd7SNJrXTPyO  1756116228      fp_560af6e559   \n",
            "2  chatcmpl-C8OLC3T669ISRNPs2ORkRkynz5T2a  1756116226      fp_51db84afab   \n",
            "3  chatcmpl-C8OLCjVkZAlfuw4ZCGxiLyIwQL4Jy  1756116226      fp_51db84afab   \n",
            "4  chatcmpl-C8OLEns9QpypGzSkll89ZSmRojiWk  1756116228      fp_560af6e559   \n",
            "\n",
            "  error error_code  \n",
            "0  None       None  \n",
            "1  None       None  \n",
            "2  None       None  \n",
            "3  None       None  \n",
            "4  None       None  \n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "drive_path"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "051FC6h8zpb8",
        "outputId": "cb26c640-f9bd-40eb-e471-5aa641d38c69"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "PosixPath('/content/drive/MyDrive/data/mimiciv/3.1/results_ci')"
            ]
          },
          "metadata": {},
          "execution_count": 15
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "result_path = drive_path + \"data/mimiciv/3.1/results_ci/causal_inference\""
      ],
      "metadata": {
        "id": "upBDOL6ezo0I"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df_meta.to_csv(result_path+\"batches_meta.csv\", index=False)\n",
        "df_outputs.to_csv(result_path+\"batches_outputs.csv\", index=False)\n",
        "df_outputs.to_parquet(result_path+\"batches_outputs.parquet\", index=False)"
      ],
      "metadata": {
        "id": "_ps7iudzyL-e"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df_outputs.drop_duplicates()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 936
        },
        "id": "VSuzMshPyRc0",
        "outputId": "c1ff75b4-1ecd-4430-8d02-aa0a78b867b5"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                                      batch_id  \\\n",
              "0       batch_68ac33277ba881908f0432ed60f473e9   \n",
              "1       batch_68ac33277ba881908f0432ed60f473e9   \n",
              "2       batch_68ac33277ba881908f0432ed60f473e9   \n",
              "3       batch_68ac33277ba881908f0432ed60f473e9   \n",
              "...                                        ...   \n",
              "331789  batch_68ac3626e4d48190aa35489149fa4cab   \n",
              "331790  batch_68ac3626e4d48190aa35489149fa4cab   \n",
              "331791  batch_68ac3626e4d48190aa35489149fa4cab   \n",
              "331792  batch_68ac3626e4d48190aa35489149fa4cab   \n",
              "\n",
              "                                        request_id          custom_id  \\\n",
              "0       batch_req_68ac38fdfcf88190afdd4734d0178bec  10000032_22595853   \n",
              "1       batch_req_68ac38fdf5248190b2e9094e23f2be13  10000032_22841357   \n",
              "2       batch_req_68ac38fdf678819080f49e041d96fe70  10000032_25742920   \n",
              "3       batch_req_68ac38fdf81081908d3095c4a15c7ed4  10000032_29079034   \n",
              "...                                            ...                ...   \n",
              "331789  batch_req_68ad2805462c8190b36a2d8ac99aecb3  11533102_27365014   \n",
              "331790  batch_req_68ad28055f4881909bf6ab7417ba2c02  11533102_28939043   \n",
              "331791  batch_req_68ad28055db881908657709086a412dd  11533102_29936329   \n",
              "331792  batch_req_68ad2805aa00819083261e6a0ce5029e  11533158_26616882   \n",
              "\n",
              "        http_status                    api_request_id                   model  \\\n",
              "0               200  09d9c1519aa33749a6511a0199adb022  gpt-4o-mini-2024-07-18   \n",
              "1               200  b531ba2fd53a746599a35a3e35553099  gpt-4o-mini-2024-07-18   \n",
              "2               200  da42a4a8fabba835d4e7f07083d4f81d  gpt-4o-mini-2024-07-18   \n",
              "3               200  51c0a3ea5656d56c8b1301ef64c8d531  gpt-4o-mini-2024-07-18   \n",
              "...             ...                               ...                     ...   \n",
              "331789          200  37e5986cee5fef365601fcc839edd781  gpt-4o-mini-2024-07-18   \n",
              "331790          200  d1de20de7e1be514215bdf1b5a2da4ef  gpt-4o-mini-2024-07-18   \n",
              "331791          200  a43a83e7f1bc7675ae528b9c320a581a  gpt-4o-mini-2024-07-18   \n",
              "331792          200  85e4b8555db7c1337f276ecfb37850c7  gpt-4o-mini-2024-07-18   \n",
              "\n",
              "       assistant_role                                     assistant_text  \\\n",
              "0           assistant  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...   \n",
              "1           assistant  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...   \n",
              "2           assistant  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...   \n",
              "3           assistant  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...   \n",
              "...               ...                                                ...   \n",
              "331789      assistant  {\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 1,\\n  \"f_h...   \n",
              "331790      assistant  {\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 1,\\n  \"f_h...   \n",
              "331791      assistant  {\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 1,\\n  \"f_h...   \n",
              "331792      assistant  {\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...   \n",
              "\n",
              "       finish_reason  prompt_tokens  completion_tokens  total_tokens  \\\n",
              "0               stop           2521                 53          2574   \n",
              "1               stop           3424                 53          3477   \n",
              "2               stop           3459                 53          3512   \n",
              "3               stop           3381                 53          3434   \n",
              "...              ...            ...                ...           ...   \n",
              "331789          stop           4721                 53          4774   \n",
              "331790          stop           2819                 53          2872   \n",
              "331791          stop           4184                 53          4237   \n",
              "331792          stop           2259                 53          2312   \n",
              "\n",
              "                              response_body_id     created system_fingerprint  \\\n",
              "0       chatcmpl-C8OLATG3pViogJE2wR9kIRhwSKeNq  1756116224      fp_51db84afab   \n",
              "1       chatcmpl-C8OLEiHJjus6hcnAFhd7SNJrXTPyO  1756116228      fp_560af6e559   \n",
              "2       chatcmpl-C8OLC3T669ISRNPs2ORkRkynz5T2a  1756116226      fp_51db84afab   \n",
              "3       chatcmpl-C8OLCjVkZAlfuw4ZCGxiLyIwQL4Jy  1756116226      fp_51db84afab   \n",
              "...                                        ...         ...                ...   \n",
              "331789  chatcmpl-C8aTeQzfsobgDlgNS0KTHky2nb164  1756162878      fp_560af6e559   \n",
              "331790  chatcmpl-C8ajAmpyF9i7XW2mZfBB8JyeIpBdu  1756163840      fp_560af6e559   \n",
              "331791  chatcmpl-C8aTgCWVMemYqzETqS643rDcjrqwV  1756162880      fp_51db84afab   \n",
              "331792  chatcmpl-C8aTf0H6EiPQ93WYNbD9yNPRcNJAj  1756162879      fp_560af6e559   \n",
              "\n",
              "       error error_code  \n",
              "0       None       None  \n",
              "1       None       None  \n",
              "2       None       None  \n",
              "3       None       None  \n",
              "...      ...        ...  \n",
              "331789  None       None  \n",
              "331790  None       None  \n",
              "331791  None       None  \n",
              "331792  None       None  \n",
              "\n",
              "[331793 rows x 17 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-b91a3872-a247-430c-b20e-d9692b8d7d65\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>batch_id</th>\n",
              "      <th>request_id</th>\n",
              "      <th>custom_id</th>\n",
              "      <th>http_status</th>\n",
              "      <th>api_request_id</th>\n",
              "      <th>model</th>\n",
              "      <th>assistant_role</th>\n",
              "      <th>assistant_text</th>\n",
              "      <th>finish_reason</th>\n",
              "      <th>prompt_tokens</th>\n",
              "      <th>completion_tokens</th>\n",
              "      <th>total_tokens</th>\n",
              "      <th>response_body_id</th>\n",
              "      <th>created</th>\n",
              "      <th>system_fingerprint</th>\n",
              "      <th>error</th>\n",
              "      <th>error_code</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>batch_68ac33277ba881908f0432ed60f473e9</td>\n",
              "      <td>batch_req_68ac38fdfcf88190afdd4734d0178bec</td>\n",
              "      <td>10000032_22595853</td>\n",
              "      <td>200</td>\n",
              "      <td>09d9c1519aa33749a6511a0199adb022</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>2521</td>\n",
              "      <td>53</td>\n",
              "      <td>2574</td>\n",
              "      <td>chatcmpl-C8OLATG3pViogJE2wR9kIRhwSKeNq</td>\n",
              "      <td>1756116224</td>\n",
              "      <td>fp_51db84afab</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>batch_68ac33277ba881908f0432ed60f473e9</td>\n",
              "      <td>batch_req_68ac38fdf5248190b2e9094e23f2be13</td>\n",
              "      <td>10000032_22841357</td>\n",
              "      <td>200</td>\n",
              "      <td>b531ba2fd53a746599a35a3e35553099</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>3424</td>\n",
              "      <td>53</td>\n",
              "      <td>3477</td>\n",
              "      <td>chatcmpl-C8OLEiHJjus6hcnAFhd7SNJrXTPyO</td>\n",
              "      <td>1756116228</td>\n",
              "      <td>fp_560af6e559</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>batch_68ac33277ba881908f0432ed60f473e9</td>\n",
              "      <td>batch_req_68ac38fdf678819080f49e041d96fe70</td>\n",
              "      <td>10000032_25742920</td>\n",
              "      <td>200</td>\n",
              "      <td>da42a4a8fabba835d4e7f07083d4f81d</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>3459</td>\n",
              "      <td>53</td>\n",
              "      <td>3512</td>\n",
              "      <td>chatcmpl-C8OLC3T669ISRNPs2ORkRkynz5T2a</td>\n",
              "      <td>1756116226</td>\n",
              "      <td>fp_51db84afab</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>batch_68ac33277ba881908f0432ed60f473e9</td>\n",
              "      <td>batch_req_68ac38fdf81081908d3095c4a15c7ed4</td>\n",
              "      <td>10000032_29079034</td>\n",
              "      <td>200</td>\n",
              "      <td>51c0a3ea5656d56c8b1301ef64c8d531</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>3381</td>\n",
              "      <td>53</td>\n",
              "      <td>3434</td>\n",
              "      <td>chatcmpl-C8OLCjVkZAlfuw4ZCGxiLyIwQL4Jy</td>\n",
              "      <td>1756116226</td>\n",
              "      <td>fp_51db84afab</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>331789</th>\n",
              "      <td>batch_68ac3626e4d48190aa35489149fa4cab</td>\n",
              "      <td>batch_req_68ad2805462c8190b36a2d8ac99aecb3</td>\n",
              "      <td>11533102_27365014</td>\n",
              "      <td>200</td>\n",
              "      <td>37e5986cee5fef365601fcc839edd781</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 1,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>4721</td>\n",
              "      <td>53</td>\n",
              "      <td>4774</td>\n",
              "      <td>chatcmpl-C8aTeQzfsobgDlgNS0KTHky2nb164</td>\n",
              "      <td>1756162878</td>\n",
              "      <td>fp_560af6e559</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>331790</th>\n",
              "      <td>batch_68ac3626e4d48190aa35489149fa4cab</td>\n",
              "      <td>batch_req_68ad28055f4881909bf6ab7417ba2c02</td>\n",
              "      <td>11533102_28939043</td>\n",
              "      <td>200</td>\n",
              "      <td>d1de20de7e1be514215bdf1b5a2da4ef</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 1,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>2819</td>\n",
              "      <td>53</td>\n",
              "      <td>2872</td>\n",
              "      <td>chatcmpl-C8ajAmpyF9i7XW2mZfBB8JyeIpBdu</td>\n",
              "      <td>1756163840</td>\n",
              "      <td>fp_560af6e559</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>331791</th>\n",
              "      <td>batch_68ac3626e4d48190aa35489149fa4cab</td>\n",
              "      <td>batch_req_68ad28055db881908657709086a412dd</td>\n",
              "      <td>11533102_29936329</td>\n",
              "      <td>200</td>\n",
              "      <td>a43a83e7f1bc7675ae528b9c320a581a</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 0,\\n  \"f_dm_pre\": 1,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>4184</td>\n",
              "      <td>53</td>\n",
              "      <td>4237</td>\n",
              "      <td>chatcmpl-C8aTgCWVMemYqzETqS643rDcjrqwV</td>\n",
              "      <td>1756162880</td>\n",
              "      <td>fp_51db84afab</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>331792</th>\n",
              "      <td>batch_68ac3626e4d48190aa35489149fa4cab</td>\n",
              "      <td>batch_req_68ad2805aa00819083261e6a0ce5029e</td>\n",
              "      <td>11533158_26616882</td>\n",
              "      <td>200</td>\n",
              "      <td>85e4b8555db7c1337f276ecfb37850c7</td>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>assistant</td>\n",
              "      <td>{\\n  \"f_ckd_pre\": 1,\\n  \"f_dm_pre\": 0,\\n  \"f_h...</td>\n",
              "      <td>stop</td>\n",
              "      <td>2259</td>\n",
              "      <td>53</td>\n",
              "      <td>2312</td>\n",
              "      <td>chatcmpl-C8aTf0H6EiPQ93WYNbD9yNPRcNJAj</td>\n",
              "      <td>1756162879</td>\n",
              "      <td>fp_560af6e559</td>\n",
              "      <td>None</td>\n",
              "      <td>None</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>331793 rows × 17 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b91a3872-a247-430c-b20e-d9692b8d7d65')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-b91a3872-a247-430c-b20e-d9692b8d7d65 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-b91a3872-a247-430c-b20e-d9692b8d7d65');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    <div id=\"df-ebe414f4-7360-489b-bc1f-29082fb678db\">\n",
              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ebe414f4-7360-489b-bc1f-29082fb678db')\"\n",
              "                title=\"Suggest charts\"\n",
              "                style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "      </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "      <script>\n",
              "        async function quickchart(key) {\n",
              "          const quickchartButtonEl =\n",
              "            document.querySelector('#' + key + ' button');\n",
              "          quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "          quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "          try {\n",
              "            const charts = await google.colab.kernel.invokeFunction(\n",
              "                'suggestCharts', [key], {});\n",
              "          } catch (error) {\n",
              "            console.error('Error during call to suggestCharts:', error);\n",
              "          }\n",
              "          quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "          quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "        }\n",
              "        (() => {\n",
              "          let quickchartButtonEl =\n",
              "            document.querySelector('#df-ebe414f4-7360-489b-bc1f-29082fb678db button');\n",
              "          quickchartButtonEl.style.display =\n",
              "            google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "        })();\n",
              "      </script>\n",
              "    </div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe"
            }
          },
          "metadata": {},
          "execution_count": 14
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import datetime as dt\n",
        "outputs, failed = wait_and_download_all(batch_ids, poll_sec=60)\n",
        "print(\"Completed files:\", outputs)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "6NoL-wJF2m1S",
        "outputId": "ba9fdf20-8c46-4ccc-9084-7720a7b13f03"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[15:40:34] batch_68ac33277ba881908f0432ed60f473e9: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33277ba881908f0432ed60f473e9.jsonl\n",
            "[15:40:35] batch_68ac332e9c1481908cccb79dc614d246: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac332e9c1481908cccb79dc614d246.jsonl\n",
            "[15:40:36] batch_68ac333bf958819095befee609025dbd: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac333bf958819095befee609025dbd.jsonl\n",
            "[15:40:37] batch_68ac3343ecf481909f92a88052dfa287: in_progress\n",
            "[15:40:37] batch_68ac334a9eb88190b0f68af9681b3631: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac334a9eb88190b0f68af9681b3631.jsonl\n",
            "[15:40:38] batch_68ac335221cc819082c0826644d6c5ee: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac335221cc819082c0826644d6c5ee.jsonl\n",
            "[15:40:40] batch_68ac335a46588190a1eef0ef31895e78: in_progress\n",
            "[15:40:40] batch_68ac3360f2b48190908eff8ffbbbed82: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3360f2b48190908eff8ffbbbed82.jsonl\n",
            "[15:40:41] batch_68ac336759d08190abd706b621808bae: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac336759d08190abd706b621808bae.jsonl\n",
            "[15:40:42] batch_68ac336eb8448190bd5f3d31dad595a7: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac336eb8448190bd5f3d31dad595a7.jsonl\n",
            "[15:40:43] batch_68ac3387d4008190933ecf075646f416: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3387d4008190933ecf075646f416.jsonl\n",
            "[15:40:44] batch_68ac338f4e208190ba112d0b6a0a0290: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac338f4e208190ba112d0b6a0a0290.jsonl\n",
            "[15:40:45] batch_68ac3397ffe481908de581b2d20ac0e8: in_progress\n",
            "[15:40:45] batch_68ac33a0ec6081909972a1aaae8ffc65: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33a0ec6081909972a1aaae8ffc65.jsonl\n",
            "[15:40:46] batch_68ac33a7b3808190aa420ffcadc94ab5: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33a7b3808190aa420ffcadc94ab5.jsonl\n",
            "[15:40:47] batch_68ac33ae11e881908e47fa48d1cbc4fa: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33ae11e881908e47fa48d1cbc4fa.jsonl\n",
            "[15:40:48] batch_68ac33ba13548190aa81d0c8be4be29e: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33ba13548190aa81d0c8be4be29e.jsonl\n",
            "[15:40:48] batch_68ac33c1a7308190941d925a2e24913e: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33c1a7308190941d925a2e24913e.jsonl\n",
            "[15:40:50] batch_68ac33c920cc819080d15875e0db6524: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33c920cc819080d15875e0db6524.jsonl\n",
            "[15:40:50] batch_68ac33d1d1c88190892bbaea710c38f4: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33d1d1c88190892bbaea710c38f4.jsonl\n",
            "[15:40:52] batch_68ac33d98458819093b82865422b4654: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33d98458819093b82865422b4654.jsonl\n",
            "[15:40:54] batch_68ac33e315748190a3cd9bdbe8cb2af8: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33e315748190a3cd9bdbe8cb2af8.jsonl\n",
            "[15:40:55] batch_68ac33e80f7c8190ade9bc2f7539edbe: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33e80f7c8190ade9bc2f7539edbe.jsonl\n",
            "[15:40:56] batch_68ac33ecf754819087b02009fe45042f: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33ecf754819087b02009fe45042f.jsonl\n",
            "[15:40:57] batch_68ac33f453d8819082383149be194b38: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac33f453d8819082383149be194b38.jsonl\n",
            "[15:40:59] batch_68ac3401a1548190afa514f96908a7e7: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3401a1548190afa514f96908a7e7.jsonl\n",
            "[15:41:00] batch_68ac340dbfb88190a7fe3103431dee41: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac340dbfb88190a7fe3103431dee41.jsonl\n",
            "[15:41:01] batch_68ac341af1248190a7c7164aa35aef8e: in_progress\n",
            "[15:41:01] batch_68ac342221a08190878a42ab2cd12bbc: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac342221a08190878a42ab2cd12bbc.jsonl\n",
            "[15:41:03] batch_68ac3487a6d48190a653b80c6d19a1c1: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3487a6d48190a653b80c6d19a1c1.jsonl\n",
            "[15:41:04] batch_68ac348f155081909cafa36c7076bee4: validating\n",
            "[15:41:04] batch_68ac3497a6c081909d81ab1d12d4b6db: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3497a6c081909d81ab1d12d4b6db.jsonl\n",
            "[15:41:04] batch_68ac349cf22c819080bd744b365cb165: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac349cf22c819080bd744b365cb165.jsonl\n",
            "[15:41:06] batch_68ac34a7800c81908ae2a21b7e115634: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac34a7800c81908ae2a21b7e115634.jsonl\n",
            "[15:41:07] batch_68ac34cd121c8190b261a1bbff0325cd: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac34cd121c8190b261a1bbff0325cd.jsonl\n",
            "[15:41:08] batch_68ac34de864c8190b187fb040e34a812: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac34de864c8190b187fb040e34a812.jsonl\n",
            "[15:41:09] batch_68ac34e5c1ac8190a596c36de40f71b0: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac34e5c1ac8190a596c36de40f71b0.jsonl\n",
            "[15:41:10] batch_68ac34eb0b68819086801ce1f1c259b9: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac34eb0b68819086801ce1f1c259b9.jsonl\n",
            "[15:41:11] batch_68ac34f458308190b61874153cb3ccac: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac34f458308190b61874153cb3ccac.jsonl\n",
            "[15:41:12] batch_68ac3500b7a48190839a59d909ea9f2c: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3500b7a48190839a59d909ea9f2c.jsonl\n",
            "[15:41:13] batch_68ac3507ee688190a251cac823bfe5d0: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3507ee688190a251cac823bfe5d0.jsonl\n",
            "[15:41:16] batch_68ac350fa6ec8190a656264dbc0012dc: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac350fa6ec8190a656264dbc0012dc.jsonl\n",
            "[15:41:17] batch_68ac35129c588190a0104a4220615ca2: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35129c588190a0104a4220615ca2.jsonl\n",
            "[15:41:17] batch_68ac351dbdb48190ba270e033c58d50d: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac351dbdb48190ba270e033c58d50d.jsonl\n",
            "[15:41:18] batch_68ac3527acc88190806cc1a825c890a9: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3527acc88190806cc1a825c890a9.jsonl\n",
            "[15:41:19] batch_68ac354322d08190bbee31fa1f5d6d74: in_progress\n",
            "[15:41:19] batch_68ac354adc0881908962be9da3ae6e5d: in_progress\n",
            "[15:41:19] batch_68ac35583aac8190b1c851211d37969d: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35583aac8190b1c851211d37969d.jsonl\n",
            "[15:41:20] batch_68ac355e77288190861d5e68e3466dcc: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac355e77288190861d5e68e3466dcc.jsonl\n",
            "[15:41:21] batch_68ac35663b0c8190904ae58f2f27b4fe: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35663b0c8190904ae58f2f27b4fe.jsonl\n",
            "[15:41:22] batch_68ac356cd1948190b9a3709068077b4e: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac356cd1948190b9a3709068077b4e.jsonl\n",
            "[15:41:23] batch_68ac3573e8bc8190a3527034d9907bf7: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac3573e8bc8190a3527034d9907bf7.jsonl\n",
            "[15:41:24] batch_68ac357dc55c8190b51652e68f0f7e26: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac357dc55c8190b51652e68f0f7e26.jsonl\n",
            "[15:41:25] batch_68ac358767488190bafbe8bed657efd3: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac358767488190bafbe8bed657efd3.jsonl\n",
            "[15:41:27] batch_68ac35a403e48190b307119df17f3db7: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35a403e48190b307119df17f3db7.jsonl\n",
            "[15:41:27] batch_68ac35b02ff8819083a5cb31843458ef: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35b02ff8819083a5cb31843458ef.jsonl\n",
            "[15:41:28] batch_68ac35bcb42481908a219aad43814bea: in_progress\n",
            "[15:41:28] batch_68ac35c3a4e48190bf1d648602703c21: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35c3a4e48190bf1d648602703c21.jsonl\n",
            "[15:41:29] batch_68ac35cd6b288190b92b47c9a47c70eb: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35cd6b288190b92b47c9a47c70eb.jsonl\n",
            "[15:41:30] batch_68ac35dfc4e48190aca665f8511c1a0f: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac35dfc4e48190aca665f8511c1a0f.jsonl\n",
            "[15:41:31] batch_68ac35fae7dc8190aaec05c4c01adf5d: in_progress\n",
            "[15:41:31] batch_68ac360212848190bff9635212dd7f0b: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac360212848190bff9635212dd7f0b.jsonl\n",
            "[15:41:32] batch_68ac360bea808190b800d9bf96053033: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac360bea808190b800d9bf96053033.jsonl\n",
            "[15:41:33] batch_68ac360f13c4819086d7deb34934489c: completed\n",
            " ✅ downloaded → /content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results/batch_68ac360f13c4819086d7deb34934489c.jsonl\n",
            "[15:41:34] batch_68ac3615edf4819083ba8353ebe312d9: completed\n"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m/tmp/ipython-input-3034581286.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdatetime\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mdt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfailed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwait_and_download_all\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_ids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpoll_sec\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Completed files:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/tmp/ipython-input-815602055.py\u001b[0m in \u001b[0;36mwait_and_download_all\u001b[0;34m(batch_ids, poll_sec)\u001b[0m\n\u001b[1;32m     17\u001b[0m                         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     18\u001b[0m                             \u001b[0mfid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_file_id\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m                             \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfiles\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     20\u001b[0m                             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresp\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbytearray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"content\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m                             \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/openai/resources/files.py\u001b[0m in \u001b[0;36mcontent\u001b[0;34m(self, file_id, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    284\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Expected a non-empty value for `file_id` but received {file_id!r}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m         \u001b[0mextra_headers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"Accept\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"application/binary\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mextra_headers\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 286\u001b[0;31m         return self._get(\n\u001b[0m\u001b[1;32m    287\u001b[0m             \u001b[0;34mf\"/files/{file_id}/content\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    288\u001b[0m             options=make_request_options(\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/openai/_base_client.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, path, cast_to, options, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1203\u001b[0m         \u001b[0;31m# cast is required because mypy complains about returning Any even though\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1204\u001b[0m         \u001b[0;31m# it understands the type variables\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1205\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mResponseT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcast_to\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mopts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstream_cls\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstream_cls\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1206\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1207\u001b[0m     \u001b[0;34m@\u001b[0m\u001b[0moverload\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/openai/_base_client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, cast_to, options, stream, stream_cls)\u001b[0m\n\u001b[1;32m    980\u001b[0m             \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    981\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 982\u001b[0;31m                 response = self._client.send(\n\u001b[0m\u001b[1;32m    983\u001b[0m                     \u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    984\u001b[0m                     \u001b[0mstream\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstream\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_should_stream_response_body\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpx/_client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, auth, follow_redirects)\u001b[0m\n\u001b[1;32m    912\u001b[0m         \u001b[0mauth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_build_request_auth\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mauth\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    913\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 914\u001b[0;31m         response = self._send_handling_auth(\n\u001b[0m\u001b[1;32m    915\u001b[0m             \u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    916\u001b[0m             \u001b[0mauth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mauth\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpx/_client.py\u001b[0m in \u001b[0;36m_send_handling_auth\u001b[0;34m(self, request, auth, follow_redirects, history)\u001b[0m\n\u001b[1;32m    940\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    941\u001b[0m             \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 942\u001b[0;31m                 response = self._send_handling_redirects(\n\u001b[0m\u001b[1;32m    943\u001b[0m                     \u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    944\u001b[0m                     \u001b[0mfollow_redirects\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfollow_redirects\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpx/_client.py\u001b[0m in \u001b[0;36m_send_handling_redirects\u001b[0;34m(self, request, follow_redirects, history)\u001b[0m\n\u001b[1;32m    977\u001b[0m                 \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    978\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 979\u001b[0;31m             \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_single_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    980\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    981\u001b[0m                 \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_event_hooks\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"response\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpx/_client.py\u001b[0m in \u001b[0;36m_send_single_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m   1012\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1013\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mrequest_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1014\u001b[0;31m             \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtransport\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1015\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1016\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSyncByteStream\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpx/_transports/default.py\u001b[0m in \u001b[0;36mhandle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    248\u001b[0m         )\n\u001b[1;32m    249\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mmap_httpcore_exceptions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 250\u001b[0;31m             \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pool\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    251\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    252\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtyping\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/connection_pool.py\u001b[0m in \u001b[0;36mhandle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    254\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    255\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_close_connections\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mclosing\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 256\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    257\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    258\u001b[0m         \u001b[0;31m# Return the response. Note that in this case we still have to manage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/connection_pool.py\u001b[0m in \u001b[0;36mhandle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    234\u001b[0m                 \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    235\u001b[0m                     \u001b[0;31m# Send the request on the assigned connection.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 236\u001b[0;31m                     response = connection.handle_request(\n\u001b[0m\u001b[1;32m    237\u001b[0m                         \u001b[0mpool_request\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    238\u001b[0m                     )\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/connection.py\u001b[0m in \u001b[0;36mhandle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    101\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    102\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 103\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    104\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    105\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_connect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mRequest\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mNetworkStream\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/http11.py\u001b[0m in \u001b[0;36mhandle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    134\u001b[0m                 \u001b[0;32mwith\u001b[0m \u001b[0mTrace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"response_closed\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtrace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    135\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_response_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mexc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    138\u001b[0m     \u001b[0;31m# Sending the request...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/http11.py\u001b[0m in \u001b[0;36mhandle_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    104\u001b[0m                     \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    105\u001b[0m                     \u001b[0mtrailing_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 106\u001b[0;31m                 ) = self._receive_response_headers(**kwargs)\n\u001b[0m\u001b[1;32m    107\u001b[0m                 trace.return_value = (\n\u001b[1;32m    108\u001b[0m                     \u001b[0mhttp_version\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/http11.py\u001b[0m in \u001b[0;36m_receive_response_headers\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m    175\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    176\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 177\u001b[0;31m             \u001b[0mevent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_receive_event\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    178\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mh11\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mResponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    179\u001b[0m                 \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_sync/http11.py\u001b[0m in \u001b[0;36m_receive_event\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    215\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    216\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mevent\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mh11\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNEED_DATA\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 217\u001b[0;31m                 data = self._network_stream.read(\n\u001b[0m\u001b[1;32m    218\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mREAD_NUM_BYTES\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    219\u001b[0m                 )\n",
            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/httpcore/_backends/sync.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, max_bytes, timeout)\u001b[0m\n\u001b[1;32m    126\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mmap_exceptions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexc_map\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    127\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_bytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    130\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.12/ssl.py\u001b[0m in \u001b[0;36mrecv\u001b[0;34m(self, buflen, flags)\u001b[0m\n\u001b[1;32m   1230\u001b[0m                     \u001b[0;34m\"non-zero flags not allowed in calls to recv() on %s\"\u001b[0m \u001b[0;34m%\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1231\u001b[0m                     self.__class__)\n\u001b[0;32m-> 1232\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuflen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1233\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1234\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuflen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.12/ssl.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m   1103\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1105\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sslobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1106\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mSSLError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1107\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mSSL_ERROR_EOF\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from pathlib import Path\n",
        "\n",
        "results_dir = Path(\"/content/drive/MyDrive/data/mimiciv/3.1/results_ci/batch_results\")\n",
        "\n",
        "# 다운로드 완료된 파일만 골라오기\n",
        "completed_files = sorted([p for p in results_dir.glob(\"batch_*.jsonl\") if p.stat().st_size > 0])\n",
        "\n",
        "print(\"완료된 파일 개수:\", len(completed_files))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_cF_yRDLyjot",
        "outputId": "2375dd63-0d6d-4eda-da25-3711f4f1d0bd"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "완료된 파일 개수: 57\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "\n",
        "# 4) JSONL 파싱 → DataFrame (custom_id에서 subject_id/hadm_id 복구)\n",
        "def parse_jsonl_to_df(jsonl_path):\n",
        "    rows = []\n",
        "    with open(jsonl_path, \"r\", encoding=\"utf-8\") as f:\n",
        "        for line in f:\n",
        "            if not line.strip():\n",
        "                continue\n",
        "            rec = json.loads(line)\n",
        "            cid = rec.get(\"custom_id\", \"\")\n",
        "            body = (rec.get(\"response\", {}) or {}).get(\"body\", {})\n",
        "            # chat.completions 출력 케이스\n",
        "            content = None\n",
        "            if \"choices\" in body:\n",
        "                try:\n",
        "                    content = body[\"choices\"][0][\"message\"][\"content\"]\n",
        "                except Exception:\n",
        "                    content = None\n",
        "            # responses.create (Responses API) 케이스 (혹시 쓴 경우 대비)\n",
        "            if content is None and \"output\" in body:\n",
        "                try:\n",
        "                    parts = body[\"output\"]\n",
        "                    texts = [p.get(\"content\", [{}])[0].get(\"text\", {}).get(\"value\") for p in parts]\n",
        "                    content = \"\\n\".join([t for t in texts if t])\n",
        "                except Exception:\n",
        "                    content = None\n",
        "            if not content:\n",
        "                continue\n",
        "\n",
        "            # JSON만 뽑기\n",
        "            try:\n",
        "                payload = json.loads(content)\n",
        "            except Exception:\n",
        "                m = re.search(r\"\\{.*\\}\", content, re.S)\n",
        "                payload = json.loads(m.group(0)) if m else {}\n",
        "\n",
        "            # id 복구: export 시 custom_id에 hadm/sid 심어두었다고 가정\n",
        "            # 예시 패턴:\n",
        "            #  - \"hadm:22841357|sid:10000032\"\n",
        "            #  - \"sid_10000032_hadm_22841357\"\n",
        "            #  - \"hadm_22841357\"\n",
        "            sid = None; hid = None\n",
        "            m1 = re.search(r\"hadm[:_](\\d+)\", cid)\n",
        "            m2 = re.search(r\"(?:sid|subject)[:_](\\d+)\", cid)\n",
        "            if m1: hid = int(m1.group(1))\n",
        "            if m2: sid = int(m2.group(1))\n",
        "            row = {\"custom_id\": cid, \"subject_id\": sid, \"hadm_id\": hid}\n",
        "            # 관심 confounder만 안전하게 투영\n",
        "            for k in [\"f_ckd_pre\",\"f_dm_pre\",\"f_hf_pre\",\"f_liver_pre\",\"f_nephrotox_pre\"]:\n",
        "                v = payload.get(k, 0)\n",
        "                try:\n",
        "                    row[k] = int(v)\n",
        "                except Exception:\n",
        "                    row[k] = 0\n",
        "            rows.append(row)\n",
        "    df = pd.DataFrame(rows).drop_duplicates(subset=[\"subject_id\",\"hadm_id\"])\n",
        "    # 타입 정리\n",
        "    if \"subject_id\" in df: df[\"subject_id\"] = df[\"subject_id\"].astype(\"Int64\")\n",
        "    if \"hadm_id\"   in df: df[\"hadm_id\"]   = df[\"hadm_id\"].astype(\"Int64\")\n",
        "    return df\n",
        "\n",
        "def merge_all_batch_results(results_dir: Path) -> pd.DataFrame:\n",
        "    paths = sorted([str(results_dir/p) for p in os.listdir(results_dir) if p.endswith(\".jsonl\")])\n",
        "    dfs = []\n",
        "    for p in paths:\n",
        "        try:\n",
        "            dfs.append(parse_jsonl_to_df(p))\n",
        "        except Exception as e:\n",
        "            print(f\"[warn] parse failed for {p}: {e}\")\n",
        "    if not dfs:\n",
        "        return pd.DataFrame(columns=[\"subject_id\",\"hadm_id\",\"f_ckd_pre\",\"f_dm_pre\",\"f_hf_pre\",\"f_liver_pre\",\"f_nephrotox_pre\"])\n",
        "    out = pd.concat(dfs, ignore_index=True)\n",
        "    # 만약 중복 hadm_id가 남아있으면 OR(any)로 합치기\n",
        "    agg = (out.groupby([\"subject_id\",\"hadm_id\"], dropna=False)\n",
        "              [[\"f_ckd_pre\",\"f_dm_pre\",\"f_hf_pre\",\"f_liver_pre\",\"f_nephrotox_pre\"]]\n",
        "              .max()\n",
        "              .reset_index())\n",
        "    return agg"
      ],
      "metadata": {
        "id": "53Gzbxgj2kgP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "dfs = [parse_jsonl_to_df(str(p)) for p in completed_files]\n",
        "partial_df = pd.concat(dfs, ignore_index=True)\n",
        "\n",
        "# 중복 제거 및 집계\n",
        "partial_df = (\n",
        "    partial_df.groupby([\"subject_id\",\"hadm_id\"], dropna=False)\n",
        "              [[\"f_ckd_pre\",\"f_dm_pre\",\"f_hf_pre\",\"f_liver_pre\",\"f_nephrotox_pre\"]]\n",
        "              .max()\n",
        "              .reset_index()\n",
        ")\n",
        "\n",
        "print(\"Partial DF shape:\", partial_df.shape)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "OS4ygPDzttdj",
        "outputId": "2da12c5e-4da2-4c3d-842d-620f349fa140"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Partial DF shape: (1, 7)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "lkpZZ0EY7gpr"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}