{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate the env"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### generate-action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_generation.py ByteSized32/data/experiment_action.csv \\\n",
    "--output-folder results/bytes32/code \\\n",
    "--model gpt-4.1-mini "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_generate_agentic_world_model.py ByteSized32/data/experiment_action.csv \\\n",
    "--output-folder results/bytes32/agentic_world_model \\\n",
    "--model gpt-4.1-mini"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### generate-object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_generation.py ByteSized32/data/experiment_object.csv \\\n",
    "--output-folder results/bytes32/code \\\n",
    "--model gpt-4.1-mini"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_generate_agentic_world_model.py ByteSized32/data/experiment_object.csv \\\n",
    "--output-folder results/bytes32/agentic_world_model \\\n",
    "--model gpt-4.1-mini"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### generate-distractor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_generation.py ByteSized32/data/experiment_distractor.csv \\\n",
    "--output-folder results/bytes32/code \\\n",
    "--model gpt-4.1-mini \\\n",
    "--zero-shot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_generate_agentic_world_model.py ByteSized32/data/experiment_distractor.csv \\\n",
    "--output-folder results/bytes32/agentic_world_model \\\n",
    "--model gpt-4.1-mini"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Perform Code Reflection"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It reflection is just for baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_reflection.py \\\n",
    "--game-folder results/bytes32/code  \\\n",
    "--revision-folder results/bytes32/revised_games/ \\\n",
    "--max-reflection-steps 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Eval "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_evaluation.py \\\n",
    "--game-folder results/bytes32/text2world \\\n",
    "--results-file \"results/bytes32/text2world/eval_text2world_results.json\" \\\n",
    "--compliance-majority-vote 11 \\\n",
    "--skip-check-alignment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/run_code_evaluation.py \\\n",
    "--game-folder results/bytes32/agentic_world_model \\\n",
    "--results-file \"results/bytes32/eval_agentic_results.json\" \\\n",
    "--compliance-majority-vote 11"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Statistics of the final indicators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/make_table2.py  \\\n",
    "--results results/bytes32/eval_code_results.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/make_table3.py  \\\n",
    "--results results/bytes32/eval_code_results.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/make_table2.py  \\\n",
    "--results results/bytes32/eval_agentic_results.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/scripts/make_table3.py  \\\n",
    "--results results/bytes32/eval_agentic_results.json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analysis\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python ByteSized32/analysis/compare_textgame.py results/bytes32/eval_agentic_results.json results/bytes32/eval_code_results.json"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "agenticworldmodel",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
