{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6e6e0882-1802-43be-acef-369af527281d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def eval(dataset, model, queries=False, **kwargs):\n",
    "    args = \" \".join([f\"{k}={v}\" for k,v in kwargs.items()])\n",
    "    print(f\"{dataset} {model} {args}\")\n",
    "    q = \"queries\" if queries else \"\"\n",
    "    !cd .. && PYTHONPATH=evaluation python -m evaluation.evaluator $dataset $model $args $q\n",
    "# !cd .. && PYTHONPATH=evaluation python -m evaluation.evaluator GSM8@cot openai/text-ada-001 size=$size decoder=beam_var num_beams=2 step_budget=200 prune=1.05 num_workers=1 queries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2f17e44-1fef-4865-b579-cb49eb02c150",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "eval(\"AQUA@cot\", \"openai/text-davinci-003\", size=\"small\", decoder=\"argmax\", num_workers=8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1aa3032-0d3b-48e6-ad63-cfb80ce63f54",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "eval(\"AQUA@dash_multivar\", \"openai/text-davinci-003\", size=\"small\", decoder=\"argmax\", step_budget=300, prune=1.05, num_workers=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "14f5dc61-2112-455d-b7ea-1e4a94028c5f",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AQUA@dash_multivar openai/text-davinci-003 size=small decoder=var step_budget=300 n=2 b=2 num_workers=20\n",
      "Running in /home/anon/repos/multi-var-llm-tasks\n",
      "['/home/anon/repos/multi-var-llm-tasks/evaluation/evaluator.py', 'AQUA@dash_multivar', 'openai/text-davinci-003', 'size=small', 'decoder=var', 'step_budget=300', 'n=2', 'b=2', 'num_workers=20']\n",
      "Cannot find name for suite StrategyQA\n",
      "Evaluation\n",
      "  task: AQUA@dash_multivar_small\n",
      "  model: openai/text-davinci-003\n",
      "  decoder: var\n",
      "  shots: 0\n",
      "  num_workers: 20\n",
      "  suite: <class 'eval-aqua.AquaDashMultiVarCot'>\n",
      "  kwargs: {'max_length': 512, 'top1_distribution': True, 'step_budget': '300', 'n': '2', 'b': '2'}\n",
      "  size: small\n",
      "=========================================\n",
      "  0%|                                                    | 0/50 [00:00<?, ?it/s]\n",
      "OpenAI API Stats: 10 requests, 0 errors, 4902 tokens, 4.0 average batch size Cos\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 37 requests, 1 errors, 12936 tokens, 2.7837837837837838 averagFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 84 requests, 2 errors, 28066 tokens, 2.511904761904762 averageFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 85 requests, 3 errors, 28466 tokens, 2.5176470588235293 averagFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 138 requests, 4 errors, 44896 tokens, 2.347826086956522 averagFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 189 requests, 5 errors, 60724 tokens, 2.2222222222222223 averaFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 237 requests, 6 errors, 77866 tokens, 2.1772151898734178 averaFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 246 requests, 7 errors, 81789 tokens, 2.1747967479674797 avera\n",
      "OpenAI API Stats: 256 requests, 7 errors, 86257 tokens, 2.18359375 average batch\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 312 requests, 8 errors, 112460 tokens, 2.217948717948718 avera\n",
      "OpenAI API Stats: 317 requests, 8 errors, 114810 tokens, 2.220820189274448 avera\u001b[A\n",
      "OpenAI API Stats: 335 requests, 8 errors, 122587 tokens, 2.2208955223880595 aver\u001b[A\n",
      "OpenAI API Stats: 388 requests, 8 errors, 145656 tokens, 2.211340206185567 avera\u001b[A\n",
      "OpenAI API Stats: 431 requests, 8 errors, 164286 tokens, 2.1972157772621808 aver\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 479 requests, 9 errors, 186957 tokens, 2.2025052192066807 aver\n",
      "OpenAI API Stats: 485 requests, 9 errors, 190309 tokens, 2.2061855670103094 aver\u001b[A\n",
      "OpenAI API Stats: 494 requests, 9 errors, 192623 tokens, 2.1923076923076925 aver\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 510 requests, 10 errors, 202103 tokens, 2.2 average batch size\n",
      "OpenAI API Stats: 512 requests, 10 errors, 202472 tokens, 2.197265625 average ba\u001b[A\n",
      "OpenAI API Stats: 522 requests, 10 errors, 207421 tokens, 2.2088122605363987 ave\u001b[A\n",
      "OpenAI API Stats: 537 requests, 10 errors, 212335 tokens, 2.197392923649907 aver\u001b[Awarning: step budget exceeded\n",
      "OpenAI API Stats: 537 requests, 10 errors, 212335 tokens, 2.197392923649907 aver\n",
      "OpenAI API Stats: 578 requests, 10 errors, 229370 tokens, 2.207612456747405 aver\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 588 requests, 11 errors, 234096 tokens, 2.2108843537414966 ave\n",
      "OpenAI API Stats: 646 requests, 11 errors, 257575 tokens, 2.2027863777089784 ave\u001b[A\n",
      "OpenAI API Stats: 675 requests, 11 errors, 269903 tokens, 2.2 average batch size\u001b[A\n",
      "OpenAI API Stats: 681 requests, 11 errors, 272279 tokens, 2.1997063142437594 ave\u001b[A\n",
      "OpenAI API Stats: 739 requests, 11 errors, 296795 tokens, 2.201623815967524 aver\u001b[A\n",
      "OpenAI API Stats: 772 requests, 11 errors, 311923 tokens, 2.199481865284974 aver\u001b[Awarning: step budget exceeded\n",
      "OpenAI API Stats: 772 requests, 11 errors, 311923 tokens, 2.199481865284974 aver\n",
      "OpenAI API Stats: 779 requests, 11 errors, 315331 tokens, 2.1989730423620024 ave\u001b[A\n",
      "OpenAI API Stats: 781 requests, 11 errors, 315759 tokens, 2.1971830985915495 ave\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 794 requests, 12 errors, 320187 tokens, 2.1926952141057936 aveFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 794 requests, 13 errors, 320187 tokens, 2.1926952141057936 aveFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 832 requests, 14 errors, 334959 tokens, 2.1814903846153846 aveFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 844 requests, 15 errors, 340790 tokens, 2.1824644549763033 ave\n",
      "OpenAI API Stats: 846 requests, 15 errors, 341187 tokens, 2.1808510638297873 ave\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 874 requests, 16 errors, 353403 tokens, 2.179633867276888 aver\n",
      "OpenAI API Stats: 892 requests, 16 errors, 362162 tokens, 2.1827354260089686 ave\u001b[AFailed to call openai.Completion.create <class 'openai.error.ServiceUnavailableError'> The server is overloaded or not ready yet. Retrying...\n",
      "OpenAI API Stats: 945 requests, 17 errors, 387119 tokens, 2.1777777777777776 ave\n",
      "OpenAI API Stats: 948 requests, 17 errors, 388829 tokens, 2.178270042194093 aver\u001b[A\n",
      "OpenAI API Stats: 996 requests, 17 errors, 412864 tokens, 2.177710843373494 aver\u001b[A\n",
      "OpenAI API Stats: 1007 requests, 17 errors, 418372 tokens, 2.17974180734856 aver\u001b[Awarning: step budget exceeded\n",
      "OpenAI API Stats: 1007 requests, 17 errors, 418372 tokens, 2.17974180734856 aver\n",
      "OpenAI API Stats: 1059 requests, 17 errors, 435187 tokens, 2.151085930122757 ave\u001b[Awarning: step budget exceeded\n",
      "OpenAI API Stats: 1059 requests, 17 errors, 435187 tokens, 2.151085930122757 ave\n",
      "OpenAI API Stats: 1084 requests, 17 errors, 441656 tokens, 2.1374538745387452 av\u001b[A^C\n",
      " 50%|███████████████████                   | 25/50 [3:05:53<3:05:53, 446.14s/it]\n"
     ]
    }
   ],
   "source": [
    "eval(\"AQUA@dash_multivar\", \"openai/text-davinci-003\", size=\"small\", decoder=\"var\", step_budget=300, n=2, b=2, num_workers=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "86777eaa-b04c-4474-9c86-921e8a96cdd8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AQUA@dash_multivar openai/text-davinci-003 size=small decoder=beam_var step_budget=300 num_beams=2 prune=1.05 num_workers=20\n",
      "Running in /home/anon/repos/multi-var-llm-tasks\n",
      "['/home/anon/repos/multi-var-llm-tasks/evaluation/evaluator.py', 'AQUA@dash_multivar', 'openai/text-davinci-003', 'size=small', 'decoder=beam_var', 'step_budget=300', 'num_beams=2', 'prune=1.05', 'num_workers=20']\n",
      "Cannot find name for suite StrategyQA\n",
      "Evaluation\n",
      "  task: AQUA@dash_multivar_small\n",
      "  model: openai/text-davinci-003\n",
      "  decoder: beam_var\n",
      "  shots: 0\n",
      "  num_workers: 20\n",
      "  suite: <class 'eval-aqua.AquaDashMultiVarCot'>\n",
      "  kwargs: {'max_length': 512, 'top1_distribution': True, 'step_budget': '300', 'num_beams': '2', 'prune': '1.05'}\n",
      "  size: small\n",
      "=========================================\n",
      "  0%|                                                    | 0/50 [00:00<?, ?it/s]\n",
      "  0%|                                                    | 0/50 [00:00<?, ?it/s]\u001b[Aadvanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "advanced to variable THOUGHT\n",
      "^C\n",
      "  0%|                                                    | 0/50 [00:02<?, ?it/s]\n",
      "Traceback (most recent call last):\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n",
      "    return _run_code(code, main_globals, None,\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/runpy.py\", line 86, in _run_code\n",
      "    exec(code, run_globals)\n",
      "  File \"/home/anon/repos/multi-var-llm-tasks/evaluation/evaluator.py\", line 111, in <module>\n",
      "    task.suite(size=config.get(\"size\", None)).main(**config)\n",
      "  File \"/home/anon/repos/multi-var-llm-tasks/evaluation/evaluation_suite.py\", line 309, in main\n",
      "    asyncio.run(self.eval(model, **kwargs))\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/asyncio/runners.py\", line 44, in run\n",
      "    return loop.run_until_complete(main)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/asyncio/base_events.py\", line 636, in run_until_complete\n",
      "    self.run_forever()\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/asyncio/base_events.py\", line 603, in run_forever\n",
      "    self._run_once()\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/asyncio/base_events.py\", line 1899, in _run_once\n",
      "    handle._run()\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/asyncio/events.py\", line 80, in _run\n",
      "    self._context.run(self._callback, *self._args)\n",
      "  File \"/home/anon/repos/multi-var-llm-tasks/evaluation/../lmql/lmql/runtime/bopenai/batched_openai.py\", line 475, in complete_request_worker\n",
      "    res = openai.Completion.create(**kwargs)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/openai/api_resources/completion.py\", line 25, in create\n",
      "    return super().create(*args, **kwargs)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py\", line 153, in create\n",
      "    response, _, api_key = requestor.request(\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/openai/api_requestor.py\", line 217, in request\n",
      "    result = self.request_raw(\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/openai/api_requestor.py\", line 517, in request_raw\n",
      "    result = _thread_context.session.request(\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/requests/sessions.py\", line 587, in request\n",
      "    resp = self.send(prep, **send_kwargs)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/requests/sessions.py\", line 745, in send\n",
      "    r.content\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/requests/models.py\", line 899, in content\n",
      "    self._content = b\"\".join(self.iter_content(CONTENT_CHUNK_SIZE)) or b\"\"\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/requests/models.py\", line 816, in generate\n",
      "    yield from self.raw.stream(chunk_size, decode_content=True)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/urllib3/response.py\", line 624, in stream\n",
      "    for line in self.read_chunked(amt, decode_content=decode_content):\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/urllib3/response.py\", line 828, in read_chunked\n",
      "    self._update_chunk_length()\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/site-packages/urllib3/response.py\", line 758, in _update_chunk_length\n",
      "    line = self._fp.fp.readline()\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/socket.py\", line 705, in readinto\n",
      "    return self._sock.recv_into(b)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/ssl.py\", line 1274, in recv_into\n",
      "    return self.read(nbytes, buffer)\n",
      "  File \"/home/anon/miniconda3/envs/lmql/lib/python3.10/ssl.py\", line 1130, in read\n",
      "    return self._sslobj.read(len, buffer)\n",
      "KeyboardInterrupt\n"
     ]
    }
   ],
   "source": [
    "eval(\"AQUA@dash_multivar\", \"openai/text-davinci-003\", size=\"small\", decoder=\"beam_var\", step_budget=300, num_beams=2, prune=1.05, num_workers=20)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
