{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read json file function\n",
    "def read_json_file(file_path):\n",
    "    with open(file_path, 'r') as file:\n",
    "        return json.load(file)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Full dataset\n",
    "BASE_SAVE_PATH = \"/mnt/shared/data/stlm-logic\"\n",
    "dataset_path = os.path.join(BASE_SAVE_PATH, \"datasets\")\n",
    "dataset_full_path  = os.path.join(dataset_path, \"tictactoe_dataset.json\")\n",
    "\n",
    "# Read the dataset\n",
    "dataset = read_json_file(dataset_full_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5478"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'state_id': 1500,\n",
       " 'board': [1, 1, 0, 0, 2, 0, 0, 2, 0],\n",
       " 'ascii_board': 'X X .\\n. O .\\n. O .',\n",
       " 'text_instruction': 'Row 0: X, X, empty. Row 1: empty, O, empty. Row 2: empty, O, empty.',\n",
       " 'move_sequences': [[1, 14, 2, 17],\n",
       "  [1, 17, 2, 14],\n",
       "  [2, 14, 1, 17],\n",
       "  [2, 17, 1, 14]],\n",
       " 'is_terminal': False,\n",
       " 'winner': 0,\n",
       " 'next_legal_moves': [3, 4, 6, 7, 9],\n",
       " 'canonical_symmetry_id': [0, 0, 0, 1, 2, 2, 1, 0, 0],\n",
       " 'text_instruction_alt': 'Row 0: +, +, empty. Row 1: empty, Y, empty. Row 2: empty, Y, empty.',\n",
       " 'ascii_board_alt': '+ + .\\n. Y .\\n. Y .',\n",
       " 'best_moves': [3]}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset[1500]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "1\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "2\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "3\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "4\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "5\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "6\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n",
      "7\n"
     ]
    }
   ],
   "source": [
    "for d in dataset:\n",
    "    if d['is_terminal']:\n",
    "        move_seq = d['move_sequences'][0]\n",
    "        print(move_seq[0])\n",
    "        if move_seq[0]> 10:\n",
    "            print(d['ascii_board'])\n",
    "            print(d['board'])\n",
    "            print(d['is_terminal'])\n",
    "            print(d['text_instruction'])\n",
    "            print(d['winner'])\n",
    "            \n",
    "            print(\"\\n\\n\")\n",
    "            print(d)\n",
    "            break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X O X\n",
      "O X O\n",
      "X . .\n",
      "[1, 2, 1, 2, 1, 2, 1, 0, 0]\n",
      "True\n",
      "Row 0: X, O, X. Row 1: O, X, O. Row 2: X, empty, empty.\n",
      "1\n",
      "\n",
      "\n",
      "\n",
      "{'state_id': 7, 'board': [1, 2, 1, 2, 1, 2, 1, 0, 0], 'ascii_board': 'X O X\\nO X O\\nX . .', 'text_instruction': 'Row 0: X, O, X. Row 1: O, X, O. Row 2: X, empty, empty.', 'move_sequences': [[1, 11, 3, 13, 5, 15, 7], [1, 11, 3, 13, 7, 15, 5], [1, 11, 3, 15, 5, 13, 7], [1, 11, 3, 15, 7, 13, 5], [1, 11, 5, 13, 3, 15, 7], [1, 11, 5, 13, 7, 15, 3], [1, 11, 5, 15, 3, 13, 7], [1, 11, 5, 15, 7, 13, 3], [1, 11, 7, 13, 3, 15, 5], [1, 11, 7, 13, 5, 15, 3], [1, 11, 7, 15, 3, 13, 5], [1, 11, 7, 15, 5, 13, 3], [1, 13, 3, 11, 5, 15, 7], [1, 13, 3, 11, 7, 15, 5], [1, 13, 3, 15, 5, 11, 7], [1, 13, 3, 15, 7, 11, 5], [1, 13, 5, 11, 3, 15, 7], [1, 13, 5, 11, 7, 15, 3], [1, 13, 5, 15, 3, 11, 7], [1, 13, 5, 15, 7, 11, 3], [1, 13, 7, 11, 3, 15, 5], [1, 13, 7, 11, 5, 15, 3], [1, 13, 7, 15, 3, 11, 5], [1, 13, 7, 15, 5, 11, 3], [1, 15, 3, 11, 5, 13, 7], [1, 15, 3, 11, 7, 13, 5], [1, 15, 3, 13, 5, 11, 7], [1, 15, 3, 13, 7, 11, 5], [1, 15, 5, 11, 3, 13, 7], [1, 15, 5, 11, 7, 13, 3], [1, 15, 5, 13, 3, 11, 7], [1, 15, 5, 13, 7, 11, 3], [1, 15, 7, 11, 3, 13, 5], [1, 15, 7, 11, 5, 13, 3], [1, 15, 7, 13, 3, 11, 5], [1, 15, 7, 13, 5, 11, 3], [3, 11, 1, 13, 5, 15, 7], [3, 11, 1, 13, 7, 15, 5], [3, 11, 1, 15, 5, 13, 7], [3, 11, 1, 15, 7, 13, 5], [3, 11, 5, 13, 1, 15, 7], [3, 11, 5, 15, 1, 13, 7], [3, 11, 7, 13, 1, 15, 5], [3, 11, 7, 15, 1, 13, 5], [3, 13, 1, 11, 5, 15, 7], [3, 13, 1, 11, 7, 15, 5], [3, 13, 1, 15, 5, 11, 7], [3, 13, 1, 15, 7, 11, 5], [3, 13, 5, 11, 1, 15, 7], [3, 13, 5, 15, 1, 11, 7], [3, 13, 7, 11, 1, 15, 5], [3, 13, 7, 15, 1, 11, 5], [3, 15, 1, 11, 5, 13, 7], [3, 15, 1, 11, 7, 13, 5], [3, 15, 1, 13, 5, 11, 7], [3, 15, 1, 13, 7, 11, 5], [3, 15, 5, 11, 1, 13, 7], [3, 15, 5, 13, 1, 11, 7], [3, 15, 7, 11, 1, 13, 5], [3, 15, 7, 13, 1, 11, 5], [5, 11, 1, 13, 3, 15, 7], [5, 11, 1, 13, 7, 15, 3], [5, 11, 1, 15, 3, 13, 7], [5, 11, 1, 15, 7, 13, 3], [5, 11, 3, 13, 1, 15, 7], [5, 11, 3, 15, 1, 13, 7], [5, 11, 7, 13, 1, 15, 3], [5, 11, 7, 15, 1, 13, 3], [5, 13, 1, 11, 3, 15, 7], [5, 13, 1, 11, 7, 15, 3], [5, 13, 1, 15, 3, 11, 7], [5, 13, 1, 15, 7, 11, 3], [5, 13, 3, 11, 1, 15, 7], [5, 13, 3, 15, 1, 11, 7], [5, 13, 7, 11, 1, 15, 3], [5, 13, 7, 15, 1, 11, 3], [5, 15, 1, 11, 3, 13, 7], [5, 15, 1, 11, 7, 13, 3], [5, 15, 1, 13, 3, 11, 7], [5, 15, 1, 13, 7, 11, 3], [5, 15, 3, 11, 1, 13, 7], [5, 15, 3, 13, 1, 11, 7], [5, 15, 7, 11, 1, 13, 3], [5, 15, 7, 13, 1, 11, 3], [7, 11, 1, 13, 3, 15, 5], [7, 11, 1, 13, 5, 15, 3], [7, 11, 1, 15, 3, 13, 5], [7, 11, 1, 15, 5, 13, 3], [7, 11, 3, 13, 1, 15, 5], [7, 11, 3, 15, 1, 13, 5], [7, 11, 5, 13, 1, 15, 3], [7, 11, 5, 15, 1, 13, 3], [7, 13, 1, 11, 3, 15, 5], [7, 13, 1, 11, 5, 15, 3], [7, 13, 1, 15, 3, 11, 5], [7, 13, 1, 15, 5, 11, 3], [7, 13, 3, 11, 1, 15, 5], [7, 13, 3, 15, 1, 11, 5], [7, 13, 5, 11, 1, 15, 3], [7, 13, 5, 15, 1, 11, 3], [7, 15, 1, 11, 3, 13, 5], [7, 15, 1, 11, 5, 13, 3], [7, 15, 1, 13, 3, 11, 5], [7, 15, 1, 13, 5, 11, 3], [7, 15, 3, 11, 1, 13, 5], [7, 15, 3, 13, 1, 11, 5], [7, 15, 5, 11, 1, 13, 3], [7, 15, 5, 13, 1, 11, 3]], 'is_terminal': True, 'winner': 1, 'next_legal_moves': [], 'canonical_symmetry_id': [0, 0, 1, 2, 1, 2, 1, 2, 1]}\n"
     ]
    }
   ],
   "source": [
    "for d in dataset:\n",
    "    if d['is_terminal']:\n",
    "        print(d['ascii_board'])\n",
    "        print(d['board'])\n",
    "        print(d['is_terminal'])\n",
    "        print(d['text_instruction'])\n",
    "        print(d['winner'])\n",
    "        \n",
    "        print(\"\\n\\n\")\n",
    "        print(d)\n",
    "        break"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# New expts setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASE_DATASETS_PATH = \"/mnt/shared/data/stlm-logic/datasets\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['tictactoe_val.json', 'random_train_dataset_0.8_0.1_0.1.json', 'tictactoe_dataset.json', 'random_val_dataset_0.8_0.1_0.1.json', 'tictactoe_train.json', 'tictactoe_test.json', 'random_test_dataset_0.8_0.1_0.1.json']\n"
     ]
    }
   ],
   "source": [
    "# Read all files in the datasets directory\n",
    "print(os.listdir(BASE_DATASETS_PATH))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_dataset= pd.read_json(os.path.join(BASE_DATASETS_PATH, 'tictactoe_dataset.json'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['state_id', 'board', 'ascii_board', 'text_instruction',\n",
       "       'move_sequences', 'is_terminal', 'winner', 'next_legal_moves',\n",
       "       'canonical_symmetry_id', 'text_instruction_alt', 'ascii_board_alt',\n",
       "       'best_moves'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_dataset.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state_id                                                               100\n",
       "board                                          [1, 2, 1, 1, 2, 2, 0, 2, 1]\n",
       "ascii_board                                            X O X\\nX O O\\n. O X\n",
       "text_instruction         Row 0: X, O, X. Row 1: X, O, O. Row 2: empty, ...\n",
       "move_sequences           [[1, 11, 3, 14, 4, 15, 9, 17], [1, 11, 3, 14, ...\n",
       "is_terminal                                                           True\n",
       "winner                                                                   2\n",
       "next_legal_moves                                                        []\n",
       "canonical_symmetry_id                          [0, 1, 1, 2, 2, 2, 1, 2, 1]\n",
       "Name: 100, dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_dataset.iloc[100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the new columns with the replacements\n",
    "full_dataset['text_instruction_alt'] = full_dataset['text_instruction'].str.replace('X', '+').str.replace('O', 'Y')\n",
    "full_dataset['ascii_board_alt'] = full_dataset['ascii_board'].str.replace('X', '+').str.replace('O', 'Y')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state_id</th>\n",
       "      <th>board</th>\n",
       "      <th>ascii_board</th>\n",
       "      <th>text_instruction</th>\n",
       "      <th>move_sequences</th>\n",
       "      <th>is_terminal</th>\n",
       "      <th>winner</th>\n",
       "      <th>next_legal_moves</th>\n",
       "      <th>canonical_symmetry_id</th>\n",
       "      <th>text_instruction_alt</th>\n",
       "      <th>ascii_board_alt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>. . .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\n. . .</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X . .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, empty, empty. Row 1: empty, empty, e...</td>\n",
       "      <td>[[1]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[11, 12, 13, 14, 15, 16, 17, 18]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 1]</td>\n",
       "      <td>Row 0: +, empty, empty. Row 1: empty, empty, e...</td>\n",
       "      <td>+ . .\\n. . .\\n. . .</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>[1, 2, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, empty. Row 1: empty, empty, empty...</td>\n",
       "      <td>[[1, 11]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[3, 4, 5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, empty. Row 1: empty, empty, empty...</td>\n",
       "      <td>+ Y .\\n. . .\\n. . .</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>[1, 2, 1, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O X\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, X. Row 1: empty, empty, empty. Ro...</td>\n",
       "      <td>[[1, 11, 3], [3, 11, 1]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[13, 14, 15, 16, 17, 18]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 1, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, +. Row 1: empty, empty, empty. Ro...</td>\n",
       "      <td>+ Y +\\n. . .\\n. . .</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>[1, 2, 1, 2, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O X\\nO . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, X. Row 1: O, empty, empty. Row 2:...</td>\n",
       "      <td>[[1, 11, 3, 13], [1, 13, 3, 11], [3, 11, 1, 13...</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 1, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, +. Row 1: Y, empty, empty. Row 2:...</td>\n",
       "      <td>+ Y +\\nY . .\\n. . .</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   state_id                        board          ascii_board  \\\n",
       "0         0  [0, 0, 0, 0, 0, 0, 0, 0, 0]  . . .\\n. . .\\n. . .   \n",
       "1         1  [1, 0, 0, 0, 0, 0, 0, 0, 0]  X . .\\n. . .\\n. . .   \n",
       "2         2  [1, 2, 0, 0, 0, 0, 0, 0, 0]  X O .\\n. . .\\n. . .   \n",
       "3         3  [1, 2, 1, 0, 0, 0, 0, 0, 0]  X O X\\n. . .\\n. . .   \n",
       "4         4  [1, 2, 1, 2, 0, 0, 0, 0, 0]  X O X\\nO . .\\n. . .   \n",
       "\n",
       "                                    text_instruction  \\\n",
       "0  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "1  Row 0: X, empty, empty. Row 1: empty, empty, e...   \n",
       "2  Row 0: X, O, empty. Row 1: empty, empty, empty...   \n",
       "3  Row 0: X, O, X. Row 1: empty, empty, empty. Ro...   \n",
       "4  Row 0: X, O, X. Row 1: O, empty, empty. Row 2:...   \n",
       "\n",
       "                                      move_sequences  is_terminal  winner  \\\n",
       "0                                               [[]]        False       0   \n",
       "1                                              [[1]]        False       0   \n",
       "2                                          [[1, 11]]        False       0   \n",
       "3                           [[1, 11, 3], [3, 11, 1]]        False       0   \n",
       "4  [[1, 11, 3, 13], [1, 13, 3, 11], [3, 11, 1, 13...        False       0   \n",
       "\n",
       "                   next_legal_moves        canonical_symmetry_id  \\\n",
       "0       [1, 2, 3, 4, 5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 0, 0, 0, 0]   \n",
       "1  [11, 12, 13, 14, 15, 16, 17, 18]  [0, 0, 0, 0, 0, 0, 0, 0, 1]   \n",
       "2             [3, 4, 5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "3          [13, 14, 15, 16, 17, 18]  [0, 0, 0, 0, 0, 0, 1, 2, 1]   \n",
       "4                   [5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 2, 1, 2, 1]   \n",
       "\n",
       "                                text_instruction_alt      ascii_board_alt  \n",
       "0  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\n. . .  \n",
       "1  Row 0: +, empty, empty. Row 1: empty, empty, e...  + . .\\n. . .\\n. . .  \n",
       "2  Row 0: +, Y, empty. Row 1: empty, empty, empty...  + Y .\\n. . .\\n. . .  \n",
       "3  Row 0: +, Y, +. Row 1: empty, empty, empty. Ro...  + Y +\\n. . .\\n. . .  \n",
       "4  Row 0: +, Y, +. Row 1: Y, empty, empty. Row 2:...  + Y +\\nY . .\\n. . .  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_dataset.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "def get_token_for_move(player, cell_idx):\n",
    "    \"\"\"\n",
    "    Given the current player (1 or 2) and the cell index (0..8),\n",
    "    return the token ID as specified:\n",
    "      - P1 tokens = 1..9\n",
    "      - P2 tokens = 10..18\n",
    "    cell_idx ∈ [0..8] => cell positions in row-major order:\n",
    "       (0,0)->0, (0,1)->1, (0,2)->2, (1,0)->3, ...\n",
    "    \"\"\"\n",
    "    if player == 1:\n",
    "        return cell_idx + 1  # 0->1, 1->2, ... 8->9\n",
    "    else:\n",
    "        return cell_idx + 10  # 0->10, 1->11, ... 8->18\n",
    "\n",
    "def check_winner(board):\n",
    "    \"\"\"\n",
    "    Check if there is a winner on the given 3x3 board.\n",
    "    board is a list of length 9:\n",
    "      0..8 positions: 0 = empty, 1 = P1, 2 = P2\n",
    "    Returns:\n",
    "      winner (0 if none, 1 if P1, 2 if P2),\n",
    "      is_terminal (True if someone won or board is full, else False)\n",
    "    \"\"\"\n",
    "    lines = [\n",
    "        # Rows\n",
    "        (0,1,2), (3,4,5), (6,7,8),\n",
    "        # Columns\n",
    "        (0,3,6), (1,4,7), (2,5,8),\n",
    "        # Diagonals\n",
    "        (0,4,8), (2,4,6)\n",
    "    ]\n",
    "    \n",
    "    for (a,b,c) in lines:\n",
    "        if board[a] != 0 and board[a] == board[b] == board[c]:\n",
    "            return board[a], True  # board[a] is the winner\n",
    "    \n",
    "    # If no winner, check for draw (i.e., board is full)\n",
    "    if 0 not in board:\n",
    "        return 0, True  # Draw, terminal\n",
    "    \n",
    "    return 0, False  # No winner, not terminal\n",
    "\n",
    "def minimax_all_moves_depth_sensitive(board, player, depth=0, alpha=-math.inf, beta=math.inf):\n",
    "    \"\"\"\n",
    "    Minimax that prefers faster wins and slower losses.\n",
    "    \"\"\"\n",
    "    winner, is_terminal = check_winner(board)\n",
    "    if is_terminal:\n",
    "        if winner == 1:\n",
    "            # A win is worth 10, minus the depth. Faster win = higher score.\n",
    "            return 10 - depth, []\n",
    "        elif winner == 2:\n",
    "            # A loss is worth -10, plus the depth. Slower loss = \"less bad\" score.\n",
    "            return -10 + depth, []\n",
    "        else:\n",
    "            return 0, [] # A draw is always 0.\n",
    "\n",
    "    empty_cells = [i for i, cell in enumerate(board) if cell == 0]\n",
    "    \n",
    "    if player == 1:  # Maximizing player\n",
    "        max_eval = -math.inf\n",
    "        best_moves = []\n",
    "        for move in empty_cells:\n",
    "            new_board = board[:]\n",
    "            new_board[move] = 1\n",
    "            # Increment depth for the recursive call\n",
    "            evaluation, _ = minimax_all_moves_depth_sensitive(new_board, 2, depth + 1, alpha, beta)\n",
    "            \n",
    "            if evaluation > max_eval:\n",
    "                max_eval = evaluation\n",
    "                best_moves = [move]\n",
    "            elif evaluation == max_eval:\n",
    "                best_moves.append(move)\n",
    "            \n",
    "            alpha = max(alpha, evaluation)\n",
    "            if beta <= alpha:\n",
    "                break\n",
    "        return max_eval, best_moves\n",
    "    else:  # Minimizing player\n",
    "        min_eval = math.inf\n",
    "        best_moves = []\n",
    "        for move in empty_cells:\n",
    "            new_board = board[:]\n",
    "            new_board[move] = 2\n",
    "            # Increment depth for the recursive call\n",
    "            evaluation, _ = minimax_all_moves_depth_sensitive(new_board, 1, depth + 1, alpha, beta)\n",
    "            \n",
    "            if evaluation < min_eval:\n",
    "                min_eval = evaluation\n",
    "                best_moves = [move]\n",
    "            elif evaluation == min_eval:\n",
    "                best_moves.append(move)\n",
    "\n",
    "            beta = min(beta, evaluation)\n",
    "            if beta <= alpha:\n",
    "                break\n",
    "        return min_eval, best_moves\n",
    "\n",
    "def find_all_best_moves(row):\n",
    "    \"\"\"\n",
    "    Wrapper function to be used with pandas apply.\n",
    "    This now returns a list of all optimal moves.\n",
    "    \"\"\"\n",
    "    if row['is_terminal']:\n",
    "        return [] # Return an empty list for terminal states\n",
    "\n",
    "    # Determine whose turn it is\n",
    "    p1_moves = row['board'].count(1)\n",
    "    p2_moves = row['board'].count(2)\n",
    "    current_player = 2 if p1_moves > p2_moves else 1\n",
    "\n",
    "    _, best_move_indices = minimax_all_moves_depth_sensitive(row['board'], current_player)\n",
    "    \n",
    "    if best_move_indices:\n",
    "        # Convert all best move indices to their corresponding tokens\n",
    "        return sorted([get_token_for_move(current_player, move_idx) for move_idx in best_move_indices])\n",
    "    return []\n",
    "\n",
    "# Create a new column 'best_moves' (plural) to store the list of all optimal moves.\n",
    "full_dataset['best_moves'] = full_dataset.apply(find_all_best_moves, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state_id</th>\n",
       "      <th>board</th>\n",
       "      <th>ascii_board</th>\n",
       "      <th>text_instruction</th>\n",
       "      <th>move_sequences</th>\n",
       "      <th>is_terminal</th>\n",
       "      <th>winner</th>\n",
       "      <th>next_legal_moves</th>\n",
       "      <th>canonical_symmetry_id</th>\n",
       "      <th>text_instruction_alt</th>\n",
       "      <th>ascii_board_alt</th>\n",
       "      <th>best_moves</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>. . .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\n. . .</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7, 8, 9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X . .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, empty, empty. Row 1: empty, empty, e...</td>\n",
       "      <td>[[1]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[11, 12, 13, 14, 15, 16, 17, 18]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 1]</td>\n",
       "      <td>Row 0: +, empty, empty. Row 1: empty, empty, e...</td>\n",
       "      <td>+ . .\\n. . .\\n. . .</td>\n",
       "      <td>[14, 15, 17, 18]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>[1, 2, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, empty. Row 1: empty, empty, empty...</td>\n",
       "      <td>[[1, 11]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[3, 4, 5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, empty. Row 1: empty, empty, empty...</td>\n",
       "      <td>+ Y .\\n. . .\\n. . .</td>\n",
       "      <td>[4, 5, 6, 7, 8, 9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>[1, 2, 1, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O X\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, X. Row 1: empty, empty, empty. Ro...</td>\n",
       "      <td>[[1, 11, 3], [3, 11, 1]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[13, 14, 15, 16, 17, 18]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 1, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, +. Row 1: empty, empty, empty. Ro...</td>\n",
       "      <td>+ Y +\\n. . .\\n. . .</td>\n",
       "      <td>[14, 15, 16, 18]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>[1, 2, 1, 2, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O X\\nO . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, X. Row 1: O, empty, empty. Row 2:...</td>\n",
       "      <td>[[1, 11, 3, 13], [1, 13, 3, 11], [3, 11, 1, 13...</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 1, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, +. Row 1: Y, empty, empty. Row 2:...</td>\n",
       "      <td>+ Y +\\nY . .\\n. . .</td>\n",
       "      <td>[5, 6, 9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5473</th>\n",
       "      <td>5473</td>\n",
       "      <td>[0, 0, 0, 2, 0, 0, 0, 0, 1]</td>\n",
       "      <td>. . .\\nO . .\\n. . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: O, empty, e...</td>\n",
       "      <td>[[9, 13]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 5, 6, 7, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 1, 0, 0]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: Y, empty, e...</td>\n",
       "      <td>. . .\\nY . .\\n. . +</td>\n",
       "      <td>[3, 5, 6, 7, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5474</th>\n",
       "      <td>5474</td>\n",
       "      <td>[0, 0, 0, 0, 2, 0, 0, 0, 1]</td>\n",
       "      <td>. . .\\n. O .\\n. . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, O, e...</td>\n",
       "      <td>[[9, 14]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 6, 7, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 2, 0, 0, 0, 1]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, Y, e...</td>\n",
       "      <td>. . .\\n. Y .\\n. . +</td>\n",
       "      <td>[1, 2, 3, 4, 6, 7, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5475</th>\n",
       "      <td>5475</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 0, 0, 1]</td>\n",
       "      <td>. . .\\n. . O\\n. . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[9, 15]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 7, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . Y\\n. . +</td>\n",
       "      <td>[5, 7, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5476</th>\n",
       "      <td>5476</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 2, 0, 1]</td>\n",
       "      <td>. . .\\n. . .\\nO . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[9, 16]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 1, 0, 2]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\nY . +</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5477</th>\n",
       "      <td>5477</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>. . .\\n. . .\\n. O X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[9, 17]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\n. Y +</td>\n",
       "      <td>[3, 4, 5, 6, 7]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5478 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      state_id                        board          ascii_board  \\\n",
       "0            0  [0, 0, 0, 0, 0, 0, 0, 0, 0]  . . .\\n. . .\\n. . .   \n",
       "1            1  [1, 0, 0, 0, 0, 0, 0, 0, 0]  X . .\\n. . .\\n. . .   \n",
       "2            2  [1, 2, 0, 0, 0, 0, 0, 0, 0]  X O .\\n. . .\\n. . .   \n",
       "3            3  [1, 2, 1, 0, 0, 0, 0, 0, 0]  X O X\\n. . .\\n. . .   \n",
       "4            4  [1, 2, 1, 2, 0, 0, 0, 0, 0]  X O X\\nO . .\\n. . .   \n",
       "...        ...                          ...                  ...   \n",
       "5473      5473  [0, 0, 0, 2, 0, 0, 0, 0, 1]  . . .\\nO . .\\n. . X   \n",
       "5474      5474  [0, 0, 0, 0, 2, 0, 0, 0, 1]  . . .\\n. O .\\n. . X   \n",
       "5475      5475  [0, 0, 0, 0, 0, 2, 0, 0, 1]  . . .\\n. . O\\n. . X   \n",
       "5476      5476  [0, 0, 0, 0, 0, 0, 2, 0, 1]  . . .\\n. . .\\nO . X   \n",
       "5477      5477  [0, 0, 0, 0, 0, 0, 0, 2, 1]  . . .\\n. . .\\n. O X   \n",
       "\n",
       "                                       text_instruction  \\\n",
       "0     Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "1     Row 0: X, empty, empty. Row 1: empty, empty, e...   \n",
       "2     Row 0: X, O, empty. Row 1: empty, empty, empty...   \n",
       "3     Row 0: X, O, X. Row 1: empty, empty, empty. Ro...   \n",
       "4     Row 0: X, O, X. Row 1: O, empty, empty. Row 2:...   \n",
       "...                                                 ...   \n",
       "5473  Row 0: empty, empty, empty. Row 1: O, empty, e...   \n",
       "5474  Row 0: empty, empty, empty. Row 1: empty, O, e...   \n",
       "5475  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "5476  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "5477  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "\n",
       "                                         move_sequences  is_terminal  winner  \\\n",
       "0                                                  [[]]        False       0   \n",
       "1                                                 [[1]]        False       0   \n",
       "2                                             [[1, 11]]        False       0   \n",
       "3                              [[1, 11, 3], [3, 11, 1]]        False       0   \n",
       "4     [[1, 11, 3, 13], [1, 13, 3, 11], [3, 11, 1, 13...        False       0   \n",
       "...                                                 ...          ...     ...   \n",
       "5473                                          [[9, 13]]        False       0   \n",
       "5474                                          [[9, 14]]        False       0   \n",
       "5475                                          [[9, 15]]        False       0   \n",
       "5476                                          [[9, 16]]        False       0   \n",
       "5477                                          [[9, 17]]        False       0   \n",
       "\n",
       "                      next_legal_moves        canonical_symmetry_id  \\\n",
       "0          [1, 2, 3, 4, 5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 0, 0, 0, 0]   \n",
       "1     [11, 12, 13, 14, 15, 16, 17, 18]  [0, 0, 0, 0, 0, 0, 0, 0, 1]   \n",
       "2                [3, 4, 5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "3             [13, 14, 15, 16, 17, 18]  [0, 0, 0, 0, 0, 0, 1, 2, 1]   \n",
       "4                      [5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 2, 1, 2, 1]   \n",
       "...                                ...                          ...   \n",
       "5473             [1, 2, 3, 5, 6, 7, 8]  [0, 0, 0, 0, 0, 2, 1, 0, 0]   \n",
       "5474             [1, 2, 3, 4, 6, 7, 8]  [0, 0, 0, 0, 2, 0, 0, 0, 1]   \n",
       "5475             [1, 2, 3, 4, 5, 7, 8]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "5476             [1, 2, 3, 4, 5, 6, 8]  [0, 0, 0, 0, 0, 0, 1, 0, 2]   \n",
       "5477             [1, 2, 3, 4, 5, 6, 7]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "\n",
       "                                   text_instruction_alt      ascii_board_alt  \\\n",
       "0     Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\n. . .   \n",
       "1     Row 0: +, empty, empty. Row 1: empty, empty, e...  + . .\\n. . .\\n. . .   \n",
       "2     Row 0: +, Y, empty. Row 1: empty, empty, empty...  + Y .\\n. . .\\n. . .   \n",
       "3     Row 0: +, Y, +. Row 1: empty, empty, empty. Ro...  + Y +\\n. . .\\n. . .   \n",
       "4     Row 0: +, Y, +. Row 1: Y, empty, empty. Row 2:...  + Y +\\nY . .\\n. . .   \n",
       "...                                                 ...                  ...   \n",
       "5473  Row 0: empty, empty, empty. Row 1: Y, empty, e...  . . .\\nY . .\\n. . +   \n",
       "5474  Row 0: empty, empty, empty. Row 1: empty, Y, e...  . . .\\n. Y .\\n. . +   \n",
       "5475  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . Y\\n. . +   \n",
       "5476  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\nY . +   \n",
       "5477  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\n. Y +   \n",
       "\n",
       "                       best_moves  \n",
       "0     [1, 2, 3, 4, 5, 6, 7, 8, 9]  \n",
       "1                [14, 15, 17, 18]  \n",
       "2              [4, 5, 6, 7, 8, 9]  \n",
       "3                [14, 15, 16, 18]  \n",
       "4                       [5, 6, 9]  \n",
       "...                           ...  \n",
       "5473              [3, 5, 6, 7, 8]  \n",
       "5474        [1, 2, 3, 4, 6, 7, 8]  \n",
       "5475                    [5, 7, 8]  \n",
       "5476        [1, 2, 3, 4, 5, 6, 8]  \n",
       "5477              [3, 4, 5, 6, 7]  \n",
       "\n",
       "[5478 rows x 12 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_dataset = full_dataset.drop('best_move', axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>state_id</th>\n",
       "      <th>board</th>\n",
       "      <th>ascii_board</th>\n",
       "      <th>text_instruction</th>\n",
       "      <th>move_sequences</th>\n",
       "      <th>is_terminal</th>\n",
       "      <th>winner</th>\n",
       "      <th>next_legal_moves</th>\n",
       "      <th>canonical_symmetry_id</th>\n",
       "      <th>text_instruction_alt</th>\n",
       "      <th>ascii_board_alt</th>\n",
       "      <th>best_moves</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>. . .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\n. . .</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7, 8, 9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>[1, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X . .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, empty, empty. Row 1: empty, empty, e...</td>\n",
       "      <td>[[1]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[11, 12, 13, 14, 15, 16, 17, 18]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 1]</td>\n",
       "      <td>Row 0: +, empty, empty. Row 1: empty, empty, e...</td>\n",
       "      <td>+ . .\\n. . .\\n. . .</td>\n",
       "      <td>[14, 15, 17, 18]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>[1, 2, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O .\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, empty. Row 1: empty, empty, empty...</td>\n",
       "      <td>[[1, 11]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[3, 4, 5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, empty. Row 1: empty, empty, empty...</td>\n",
       "      <td>+ Y .\\n. . .\\n. . .</td>\n",
       "      <td>[4, 5, 6, 7, 8, 9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>[1, 2, 1, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O X\\n. . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, X. Row 1: empty, empty, empty. Ro...</td>\n",
       "      <td>[[1, 11, 3], [3, 11, 1]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[13, 14, 15, 16, 17, 18]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 1, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, +. Row 1: empty, empty, empty. Ro...</td>\n",
       "      <td>+ Y +\\n. . .\\n. . .</td>\n",
       "      <td>[14, 15, 16, 18]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>[1, 2, 1, 2, 0, 0, 0, 0, 0]</td>\n",
       "      <td>X O X\\nO . .\\n. . .</td>\n",
       "      <td>Row 0: X, O, X. Row 1: O, empty, empty. Row 2:...</td>\n",
       "      <td>[[1, 11, 3, 13], [1, 13, 3, 11], [3, 11, 1, 13...</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[5, 6, 7, 8, 9]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 1, 2, 1]</td>\n",
       "      <td>Row 0: +, Y, +. Row 1: Y, empty, empty. Row 2:...</td>\n",
       "      <td>+ Y +\\nY . .\\n. . .</td>\n",
       "      <td>[5, 6, 8, 9]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5473</th>\n",
       "      <td>5473</td>\n",
       "      <td>[0, 0, 0, 2, 0, 0, 0, 0, 1]</td>\n",
       "      <td>. . .\\nO . .\\n. . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: O, empty, e...</td>\n",
       "      <td>[[9, 13]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 5, 6, 7, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 1, 0, 0]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: Y, empty, e...</td>\n",
       "      <td>. . .\\nY . .\\n. . +</td>\n",
       "      <td>[3, 5, 6, 7, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5474</th>\n",
       "      <td>5474</td>\n",
       "      <td>[0, 0, 0, 0, 2, 0, 0, 0, 1]</td>\n",
       "      <td>. . .\\n. O .\\n. . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, O, e...</td>\n",
       "      <td>[[9, 14]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 6, 7, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 2, 0, 0, 0, 1]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, Y, e...</td>\n",
       "      <td>. . .\\n. Y .\\n. . +</td>\n",
       "      <td>[1, 2, 3, 4, 6, 7, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5475</th>\n",
       "      <td>5475</td>\n",
       "      <td>[0, 0, 0, 0, 0, 2, 0, 0, 1]</td>\n",
       "      <td>. . .\\n. . O\\n. . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[9, 15]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 7, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . Y\\n. . +</td>\n",
       "      <td>[5, 7, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5476</th>\n",
       "      <td>5476</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 2, 0, 1]</td>\n",
       "      <td>. . .\\n. . .\\nO . X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[9, 16]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 8]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 1, 0, 2]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\nY . +</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 8]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5477</th>\n",
       "      <td>5477</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>. . .\\n. . .\\n. O X</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>[[9, 17]]</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>[1, 2, 3, 4, 5, 6, 7]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 2, 1]</td>\n",
       "      <td>Row 0: empty, empty, empty. Row 1: empty, empt...</td>\n",
       "      <td>. . .\\n. . .\\n. Y +</td>\n",
       "      <td>[3, 4, 5, 6, 7]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5478 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      state_id                        board          ascii_board  \\\n",
       "0            0  [0, 0, 0, 0, 0, 0, 0, 0, 0]  . . .\\n. . .\\n. . .   \n",
       "1            1  [1, 0, 0, 0, 0, 0, 0, 0, 0]  X . .\\n. . .\\n. . .   \n",
       "2            2  [1, 2, 0, 0, 0, 0, 0, 0, 0]  X O .\\n. . .\\n. . .   \n",
       "3            3  [1, 2, 1, 0, 0, 0, 0, 0, 0]  X O X\\n. . .\\n. . .   \n",
       "4            4  [1, 2, 1, 2, 0, 0, 0, 0, 0]  X O X\\nO . .\\n. . .   \n",
       "...        ...                          ...                  ...   \n",
       "5473      5473  [0, 0, 0, 2, 0, 0, 0, 0, 1]  . . .\\nO . .\\n. . X   \n",
       "5474      5474  [0, 0, 0, 0, 2, 0, 0, 0, 1]  . . .\\n. O .\\n. . X   \n",
       "5475      5475  [0, 0, 0, 0, 0, 2, 0, 0, 1]  . . .\\n. . O\\n. . X   \n",
       "5476      5476  [0, 0, 0, 0, 0, 0, 2, 0, 1]  . . .\\n. . .\\nO . X   \n",
       "5477      5477  [0, 0, 0, 0, 0, 0, 0, 2, 1]  . . .\\n. . .\\n. O X   \n",
       "\n",
       "                                       text_instruction  \\\n",
       "0     Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "1     Row 0: X, empty, empty. Row 1: empty, empty, e...   \n",
       "2     Row 0: X, O, empty. Row 1: empty, empty, empty...   \n",
       "3     Row 0: X, O, X. Row 1: empty, empty, empty. Ro...   \n",
       "4     Row 0: X, O, X. Row 1: O, empty, empty. Row 2:...   \n",
       "...                                                 ...   \n",
       "5473  Row 0: empty, empty, empty. Row 1: O, empty, e...   \n",
       "5474  Row 0: empty, empty, empty. Row 1: empty, O, e...   \n",
       "5475  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "5476  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "5477  Row 0: empty, empty, empty. Row 1: empty, empt...   \n",
       "\n",
       "                                         move_sequences  is_terminal  winner  \\\n",
       "0                                                  [[]]        False       0   \n",
       "1                                                 [[1]]        False       0   \n",
       "2                                             [[1, 11]]        False       0   \n",
       "3                              [[1, 11, 3], [3, 11, 1]]        False       0   \n",
       "4     [[1, 11, 3, 13], [1, 13, 3, 11], [3, 11, 1, 13...        False       0   \n",
       "...                                                 ...          ...     ...   \n",
       "5473                                          [[9, 13]]        False       0   \n",
       "5474                                          [[9, 14]]        False       0   \n",
       "5475                                          [[9, 15]]        False       0   \n",
       "5476                                          [[9, 16]]        False       0   \n",
       "5477                                          [[9, 17]]        False       0   \n",
       "\n",
       "                      next_legal_moves        canonical_symmetry_id  \\\n",
       "0          [1, 2, 3, 4, 5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 0, 0, 0, 0]   \n",
       "1     [11, 12, 13, 14, 15, 16, 17, 18]  [0, 0, 0, 0, 0, 0, 0, 0, 1]   \n",
       "2                [3, 4, 5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "3             [13, 14, 15, 16, 17, 18]  [0, 0, 0, 0, 0, 0, 1, 2, 1]   \n",
       "4                      [5, 6, 7, 8, 9]  [0, 0, 0, 0, 0, 2, 1, 2, 1]   \n",
       "...                                ...                          ...   \n",
       "5473             [1, 2, 3, 5, 6, 7, 8]  [0, 0, 0, 0, 0, 2, 1, 0, 0]   \n",
       "5474             [1, 2, 3, 4, 6, 7, 8]  [0, 0, 0, 0, 2, 0, 0, 0, 1]   \n",
       "5475             [1, 2, 3, 4, 5, 7, 8]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "5476             [1, 2, 3, 4, 5, 6, 8]  [0, 0, 0, 0, 0, 0, 1, 0, 2]   \n",
       "5477             [1, 2, 3, 4, 5, 6, 7]  [0, 0, 0, 0, 0, 0, 0, 2, 1]   \n",
       "\n",
       "                                   text_instruction_alt      ascii_board_alt  \\\n",
       "0     Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\n. . .   \n",
       "1     Row 0: +, empty, empty. Row 1: empty, empty, e...  + . .\\n. . .\\n. . .   \n",
       "2     Row 0: +, Y, empty. Row 1: empty, empty, empty...  + Y .\\n. . .\\n. . .   \n",
       "3     Row 0: +, Y, +. Row 1: empty, empty, empty. Ro...  + Y +\\n. . .\\n. . .   \n",
       "4     Row 0: +, Y, +. Row 1: Y, empty, empty. Row 2:...  + Y +\\nY . .\\n. . .   \n",
       "...                                                 ...                  ...   \n",
       "5473  Row 0: empty, empty, empty. Row 1: Y, empty, e...  . . .\\nY . .\\n. . +   \n",
       "5474  Row 0: empty, empty, empty. Row 1: empty, Y, e...  . . .\\n. Y .\\n. . +   \n",
       "5475  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . Y\\n. . +   \n",
       "5476  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\nY . +   \n",
       "5477  Row 0: empty, empty, empty. Row 1: empty, empt...  . . .\\n. . .\\n. Y +   \n",
       "\n",
       "                       best_moves  \n",
       "0     [1, 2, 3, 4, 5, 6, 7, 8, 9]  \n",
       "1                [14, 15, 17, 18]  \n",
       "2              [4, 5, 6, 7, 8, 9]  \n",
       "3                [14, 15, 16, 18]  \n",
       "4                    [5, 6, 8, 9]  \n",
       "...                           ...  \n",
       "5473              [3, 5, 6, 7, 8]  \n",
       "5474        [1, 2, 3, 4, 6, 7, 8]  \n",
       "5475                    [5, 7, 8]  \n",
       "5476        [1, 2, 3, 4, 5, 6, 8]  \n",
       "5477              [3, 4, 5, 6, 7]  \n",
       "\n",
       "[5478 rows x 12 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analyse Illegal Moves\n",
    "\n",
    "Output from generator script:\n",
    "\n",
    "\n",
    "(grpo-stable) data@dtza101bdu:~/stlm-game-logic/scripts/bash$ bash run_generate_illegal_boards.sh\n",
    "Running: python /home/data/stlm-game-logic/scripts/python/generate_illegal_boards.py --start-player x --output /mnt/shared/data/stlm-logic/datasets/illegal_boards_x_DOUBLE_WIN+COUNT_DIFF_GT1_any_5000_20250906_152524.json --sample-size 5000 --reasons DOUBLE_WIN COUNT_DIFF_GT1 --reason-mode any\n",
    "\n",
    "Illegal boards generated: 5000\n",
    "Reason distribution:\n",
    "  COUNT_DIFF_GT1: 4772\n",
    "  WIN_COUNT_MISMATCH: 2401\n",
    "  DOUBLE_WIN: 240\n",
    "\n",
    "Output: /mnt/shared/data/stlm-logic/datasets/illegal_boards_x_DOUBLE_WIN+COUNT_DIFF_GT1_any_5000_20250906_152524.json\n",
    "\n",
    "Meta:   /mnt/shared/data/stlm-logic/datasets/illegal_boards_x_DOUBLE_WIN+COUNT_DIFF_GT1_any_5000_20250906_152524.meta.json\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load illegal boards output and meta file\n",
    "\n",
    "illegal_boards_path = os.path.join(BASE_DATASETS_PATH, \"illegal_boards_x_DOUBLE_WIN+COUNT_DIFF_GT1_any_5000_20250906_152524.json\")\n",
    "illegal_boards_meta_path = os.path.join(BASE_DATASETS_PATH, \"illegal_boards_x_DOUBLE_WIN+COUNT_DIFF_GT1_any_5000_20250906_152524.meta.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load illegal boards into df\n",
    "illegal_boards = pd.read_json(illegal_boards_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>board</th>\n",
       "      <th>ascii_board</th>\n",
       "      <th>count_x</th>\n",
       "      <th>count_o</th>\n",
       "      <th>x_win</th>\n",
       "      <th>o_win</th>\n",
       "      <th>reasons</th>\n",
       "      <th>start_mode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[0, 0, 0, 1, 1, 1, 2, 2, 2]</td>\n",
       "      <td>. . .\\nX X X\\nO O O</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>[DOUBLE_WIN, WIN_COUNT_MISMATCH]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[0, 0, 0, 2, 2, 2, 1, 1, 1]</td>\n",
       "      <td>. . .\\nO O O\\nX X X</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>[DOUBLE_WIN, WIN_COUNT_MISMATCH]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[0, 0, 1, 1, 1, 1, 2, 2, 2]</td>\n",
       "      <td>. . X\\nX X X\\nO O O</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>[DOUBLE_WIN]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[0, 0, 1, 2, 2, 2, 1, 1, 1]</td>\n",
       "      <td>. . X\\nO O O\\nX X X</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>[DOUBLE_WIN]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[0, 0, 2, 1, 1, 1, 2, 2, 2]</td>\n",
       "      <td>. . O\\nX X X\\nO O O</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>[DOUBLE_WIN, WIN_COUNT_MISMATCH]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4995</th>\n",
       "      <td>[1, 1, 0, 1, 0, 0, 2, 1, 2]</td>\n",
       "      <td>X X .\\nX . .\\nO X O</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[COUNT_DIFF_GT1]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4996</th>\n",
       "      <td>[1, 1, 0, 1, 0, 0, 2, 2, 1]</td>\n",
       "      <td>X X .\\nX . .\\nO O X</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[COUNT_DIFF_GT1]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4997</th>\n",
       "      <td>[1, 1, 0, 1, 0, 1, 0, 0, 0]</td>\n",
       "      <td>X X .\\nX . X\\n. . .</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[COUNT_DIFF_GT1]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4998</th>\n",
       "      <td>[1, 1, 0, 1, 0, 1, 0, 0, 1]</td>\n",
       "      <td>X X .\\nX . X\\n. . X</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[COUNT_DIFF_GT1]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4999</th>\n",
       "      <td>[1, 1, 0, 1, 0, 1, 0, 0, 2]</td>\n",
       "      <td>X X .\\nX . X\\n. . O</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>[COUNT_DIFF_GT1]</td>\n",
       "      <td>x</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5000 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            board          ascii_board  count_x  count_o  \\\n",
       "0     [0, 0, 0, 1, 1, 1, 2, 2, 2]  . . .\\nX X X\\nO O O        3        3   \n",
       "1     [0, 0, 0, 2, 2, 2, 1, 1, 1]  . . .\\nO O O\\nX X X        3        3   \n",
       "2     [0, 0, 1, 1, 1, 1, 2, 2, 2]  . . X\\nX X X\\nO O O        4        3   \n",
       "3     [0, 0, 1, 2, 2, 2, 1, 1, 1]  . . X\\nO O O\\nX X X        4        3   \n",
       "4     [0, 0, 2, 1, 1, 1, 2, 2, 2]  . . O\\nX X X\\nO O O        3        4   \n",
       "...                           ...                  ...      ...      ...   \n",
       "4995  [1, 1, 0, 1, 0, 0, 2, 1, 2]  X X .\\nX . .\\nO X O        4        2   \n",
       "4996  [1, 1, 0, 1, 0, 0, 2, 2, 1]  X X .\\nX . .\\nO O X        4        2   \n",
       "4997  [1, 1, 0, 1, 0, 1, 0, 0, 0]  X X .\\nX . X\\n. . .        4        0   \n",
       "4998  [1, 1, 0, 1, 0, 1, 0, 0, 1]  X X .\\nX . X\\n. . X        5        0   \n",
       "4999  [1, 1, 0, 1, 0, 1, 0, 0, 2]  X X .\\nX . X\\n. . O        4        1   \n",
       "\n",
       "      x_win  o_win                           reasons start_mode  \n",
       "0      True   True  [DOUBLE_WIN, WIN_COUNT_MISMATCH]          x  \n",
       "1      True   True  [DOUBLE_WIN, WIN_COUNT_MISMATCH]          x  \n",
       "2      True   True                      [DOUBLE_WIN]          x  \n",
       "3      True   True                      [DOUBLE_WIN]          x  \n",
       "4      True   True  [DOUBLE_WIN, WIN_COUNT_MISMATCH]          x  \n",
       "...     ...    ...                               ...        ...  \n",
       "4995  False  False                  [COUNT_DIFF_GT1]          x  \n",
       "4996  False  False                  [COUNT_DIFF_GT1]          x  \n",
       "4997  False  False                  [COUNT_DIFF_GT1]          x  \n",
       "4998  False  False                  [COUNT_DIFF_GT1]          x  \n",
       "4999  False  False                  [COUNT_DIFF_GT1]          x  \n",
       "\n",
       "[5000 rows x 8 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "illegal_boards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      ". . .\n",
      "X X X\n",
      "O O O\n"
     ]
    }
   ],
   "source": [
    "print(illegal_boards['ascii_board'][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['board', 'ascii_board', 'count_x', 'count_o', 'x_win', 'o_win',\n",
       "       'reasons', 'start_mode'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "illegal_boards.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "healtheval",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
