{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": [
     "hide_code"
    ]
   },
   "outputs": [],
   "source": [
    "import game\n",
    "import stochastic_game\n",
    "import fictitious_play\n",
    "import numpy as np\n",
    "from mytypes import NumExpr # more or less double/int, technical reasons\n",
    "from IPython.display import Markdown\n",
    "\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": [
     "hide_code"
    ]
   },
   "outputs": [],
   "source": [
    "def show_tbl3(m):\n",
    "    s = \"\\\\begin{matrix}\"\n",
    "    for i in range(len(m)):\n",
    "        for j in range(len(m[i, :, :])):\n",
    "            s += f\"{m[i, j, 0]}, {m[i, j, 1]} &\"\n",
    "        s += \"\\\\\\\\\"\n",
    "    s += \"\\\\end{matrix}\"\n",
    "    return display(Markdown(f\"$\\\\left[{s}\\\\right]$\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For illustration purposes, we demonstrate an implementation of our proposed algorithm on a stochastic game with two states, two actions and two players.\n",
    "\n",
    "There are two states:\n",
    "- in the first one, players play a coordination game, with overall higher payoffs\n",
    "- in the second one, they play an anti coordination game with lesser payoffs\n",
    "\n",
    "Transitions are as follows:\n",
    "- in the first, state, transitions goes to the first state (up to epsilon) except if both players play 1\n",
    "- in the second state, transitions goes to the second state (up to epsilon) except if players play (1, 0)\n",
    "\n",
    "The game is identical interest, and players prefer the first state to the second one, so they should play the (0, 0) in the first state and (1, 0) in the second one."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class StochasticCoordinationGame(stochastic_game.GameWithTransitions[int]):\n",
    "\n",
    "    def __init__(self):\n",
    "        super().__init__(2)\n",
    "        self.max_state = 2\n",
    "        # 0 is the best state\n",
    "        self.r = np.array([[[ [10, 10], [0, 0]],\n",
    "                            [ [0, 0],  [10, 10]]],\n",
    "                            [[ [0, 0], [8, 8]],\n",
    "                            [ [8, 8],  [0, 0]]]\n",
    "                            ])\n",
    "        eps = 1e-2\n",
    "        self.tr = np.ones((2, 2, 2, 2))*eps\n",
    "        self.tr[0, 0, 0, 0] = 1-eps # go to state 0\n",
    "        self.tr[0, 0, 1, 0] = 1-eps\n",
    "        self.tr[0, 1, 0, 0] = 1-eps\n",
    "        self.tr[0, 1, 1, 1] = 1-eps # go to state 1\n",
    "        self.tr[1, 0, 0, 1] = 1-eps # go to state 1\n",
    "        self.tr[1, 0, 1, 1] = 1-eps # go to state 1\n",
    "        self.tr[1, 1, 0, 0] = 1-eps # go to state 0\n",
    "        self.tr[1, 1, 1, 1] = 1-eps # go to state 1\n",
    "\n",
    "\n",
    "    def all_states(self):\n",
    "        return set(range(self.max_state))\n",
    "\n",
    "    def transition(self, state: int, actions: list[int]) -> dict[int, NumExpr]:\n",
    "        raise NotImplementedError\n",
    "\n",
    "    def maxAction(self, state: int, player: int) -> int:\n",
    "        return 2\n",
    "\n",
    "    def rewards(self, state:int, actions:list[int]) -> tuple[int, list[NumExpr]]:\n",
    "        new_state = random.choices([0, 1], weights=list(self.tr[state, actions[0], actions[1], :]))[0]\n",
    "        payoffs = list(self.r[state, actions[0], actions[1]])\n",
    "        return new_state, payoffs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = StochasticCoordinationGame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Payoff matrix in state 1:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "$\\left[\\begin{matrix}10, 10 &0, 0 &\\\\0, 0 &10, 10 &\\\\\\end{matrix}\\right]$"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_tbl3(g.r[0, :, :])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Payoff matrix in state 2:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "$\\left[\\begin{matrix}0, 0 &8, 8 &\\\\8, 8 &0, 0 &\\\\\\end{matrix}\\right]$"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_tbl3(g.r[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Transition in state 1: (0.99, 0.001) means that there is 99% of chance to go to state 1 and 1% to go to state 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "$\\left[\\begin{matrix}0.99, 0.01 &0.99, 0.01 &\\\\0.99, 0.01 &0.01, 0.99 &\\\\\\end{matrix}\\right]$"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_tbl3(g.tr[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "$\\left[\\begin{matrix}0.01, 0.99 &0.01, 0.99 &\\\\0.99, 0.01 &0.01, 0.99 &\\\\\\end{matrix}\\right]$"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "show_tbl3(g.tr[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Initialize the strategies (see the attached code in `fictitious_play.py`). This are the strategies where payoff matrices and transitions are not known to the players a priori."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "p1 = fictitious_play.SFPStochasticGames(g, 0, 3)\n",
    "p2 = fictitious_play.SFPStochasticGames(g, 1, 3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Repeat the game for 10000 steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g.repeatPlay(0, [p1, p2], 10000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, we can check that the learned action profile is the equilibria."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.99999665e-01, 3.35177562e-07])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p1.mixedAction(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.99999667e-01, 3.32654707e-07])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p2.mixedAction(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4.08309080e-06, 9.99995917e-01])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p1.mixedAction(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.99997489e-01, 2.51146270e-06])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p2.mixedAction(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.90853536, 6.99959239])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p1.u"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([9.90853536, 6.99959239])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p2.u"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.6 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
