{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "# add autoreload for modules\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "from src.zero.agent import ZeroAgent\n",
    "from src.GOPS.prompts import GOPS_RULES_2\n",
    "from src.GOPS.ground_truth_models import ObservedState\n",
    "from src.llm.model import OpenAIModel\n",
    "import inspect\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here's the implementation of the `CustomValueHeuristic` class that inherits from `ValueHeuristic` and evaluates the game state for GOPS (Goofspiel):\n",
      "\n",
      "```python\n",
      "from abc import ABC, abstractmethod\n",
      "from typing import Hashable, Tuple, Dict, Optional, FrozenSet\n",
      "from dataclasses import dataclass, field\n",
      "\n",
      "@dataclass(frozen=True)\n",
      "class HiddenState:\n",
      "    player_0_played_cards: Tuple[int, ...]\n",
      "    player_1_played_cards: Tuple[int, ...]\n",
      "    played_prize_cards: Tuple[int, ...]\n",
      "    prize_deck: FrozenSet[int]\n",
      "    player_0_hand: FrozenSet[int]\n",
      "    player_1_hand: FrozenSet[int]\n",
      "    player_0_cumulative_score: int\n",
      "    player_1_cumulative_score: int\n",
      "    is_environment_turn: bool\n",
      "    contested_points: int\n",
      "    player_0_hidden_play: Optional[int] = field(default=None)\n",
      "    player_1_hidden_play: Optional[int] = field(default=None)\n",
      "\n",
      "class ValueHeuristic(ABC):\n",
      "    '''\n",
      "    Abstract class for a value heuristic\n",
      "    '''\n",
      "    \n",
      "    def evaluate(self, state: Hashable) -> Tuple[Dict[int, float], Dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        return self._evaluate(state)\n",
      "    \n",
      "    @abstractmethod\n",
      "    def _evaluate(self, state: Hashable) -> Tuple[Dict[int, float], Dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        pass\n",
      "\n",
      "class CustomValueHeuristic(ValueHeuristic):\n",
      "    '''\n",
      "    Custom heuristic for evaluating GOPS game states\n",
      "    '''\n",
      "    def _evaluate(self, state: HiddenState) -> Tuple[Dict[int, float], Dict]:\n",
      "        player_0_score = state.player_0_cumulative_score\n",
      "        player_1_score = state.player_1_cumulative_score\n",
      "\n",
      "        remaining_prizes = sum(state.prize_deck)\n",
      "        contested_points = state.contested_points\n",
      "\n",
      "        player_0_potential = sum(state.player_0_hand)\n",
      "        player_1_potential = sum(state.player_1_hand)\n",
      "\n",
      "        player_0_expected_value = player_0_score + (remaining_prizes + contested_points) * (player_0_potential / (player_0_potential + player_1_potential))\n",
      "        player_1_expected_value = player_1_score + (remaining_prizes + contested_points) * (player_1_potential / (player_0_potential + player_1_potential))\n",
      "\n",
      "        values = {\n",
      "            0: player_0_expected_value,\n",
      "            1: player_1_expected_value\n",
      "        }\n",
      "\n",
      "        notes = {\n",
      "            'player_0_score': player_0_score,\n",
      "            'player_1_score': player_1_score,\n",
      "            'remaining_prizes': remaining_prizes,\n",
      "            'contested_points': contested_points,\n",
      "            'player_0_potential': player_0_potential,\n",
      "            'player_1_potential': player_1_potential\n",
      "        }\n",
      "\n",
      "        return values, notes\n",
      "```\n",
      "\n",
      "### Explanation\n",
      "1. **HiddenState Class**:\n",
      "   - The `HiddenState` class encapsulates the current game state, including the cards played, the cards in hand, cumulative scores, and other game-specific details.\n",
      "\n",
      "2. **ValueHeuristic Abstract Class**:\n",
      "   - This abstract class outlines the structure for any value heuristic, requiring the implementation of the `_evaluate` method.\n",
      "\n",
      "3. **CustomValueHeuristic Class**:\n",
      "   - In the `CustomValueHeuristic` class, the `_evaluate` method is implemented to provide an estimation of the current state.\n",
      "   - **Scores**: The current scores for both players are extracted.\n",
      "   - **Remaining Prizes**: The total sum of the remaining prize cards.\n",
      "   - **Contested Points**: Points carried over from tied rounds.\n",
      "   - **Potential**: The sum of the cards remaining in each player’s hand, representing their potential to win future rounds.\n",
      "   - **Expected Values**: The expected future rewards are calculated by considering the current scores and the proportion of potential cards each player has.\n",
      "   - **Values Dictionary**: Contains the expected values for each player.\n",
      "   - **Notes Dictionary**: Provides additional context, including potential and current scores.\n",
      "\n",
      "This heuristic provides a simple yet effective way to estimate the value of a game state based on current scores and potential future rewards.\n",
      "name 'ValueHeuristic' is not defined\n",
      "Here's how you can implement a `CustomValueHeuristic` class for the GOPS game using the `ValueHeuristic` abstract class. The heuristic will be based on the current scores, the remaining cards in the players' hands, and the remaining prize cards in the prize deck.\n",
      "\n",
      "```python\n",
      "from abc import ABC, abstractmethod\n",
      "from typing import Hashable\n",
      "from dataclasses import dataclass, field\n",
      "from typing import Optional, Tuple, FrozenSet\n",
      "\n",
      "class ValueHeuristic(ABC):\n",
      "    '''\n",
      "    Abstract class for a value heuristic\n",
      "    '''\n",
      "    \n",
      "    def evaluate(self, state: Hashable) -> tuple[dict[int, float], dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        return self._evaluate(state)\n",
      "    \n",
      "    @abstractmethod\n",
      "    def _evaluate(self, state: Hashable) -> tuple[dict[int, float], dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        pass\n",
      "\n",
      "@dataclass\n",
      "class HiddenState:\n",
      "    '''\n",
      "    We assume cards in deck and hand are list(range(1, num_cards+1))\n",
      "    '''\n",
      "    player_0_played_cards: Tuple[int, ...] # cards played by player 0\n",
      "    player_1_played_cards: Tuple[int, ...] # cards played by player 1\n",
      "    played_prize_cards: Tuple[int, ...] # cards played as prizes\n",
      "    prize_deck: FrozenSet[int] # cards that are still in the prize deck\n",
      "    player_0_hand: FrozenSet[int] # cards in player 0's hand\n",
      "    player_1_hand: FrozenSet[int] # cards in player 1's hand\n",
      "    player_0_cumulative_score: int # player 0's cumulative score\n",
      "    player_1_cumulative_score: int # player 1's cumulative score\n",
      "    is_environment_turn: bool # whether it is the environment's turn to draw a prize card or the players' turn to each play a card\n",
      "    contested_points: int # points that are contested from previous rounds\n",
      "    player_0_hidden_play: Optional[int] = field(default=None) # unrevealed card played by player 0\n",
      "    player_1_hidden_play: Optional[int] = field(default=None) # unrevealed card played by player 1\n",
      "\n",
      "class CustomValueHeuristic(ValueHeuristic):\n",
      "    '''\n",
      "    Custom heuristic for evaluating GOPS game states.\n",
      "    '''\n",
      "\n",
      "    def _evaluate(self, state: HiddenState) -> tuple[dict[int, float], dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: HiddenState to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        # Extract relevant information from the state\n",
      "        player_0_score = state.player_0_cumulative_score\n",
      "        player_1_score = state.player_1_cumulative_score\n",
      "        remaining_prizes = sum(state.prize_deck)\n",
      "        contested_points = state.contested_points\n",
      "        \n",
      "        # Value estimation heuristics\n",
      "        player_0_value = player_0_score + (remaining_prizes / 2) + contested_points\n",
      "        player_1_value = player_1_score + (remaining_prizes / 2) + contested_points\n",
      "\n",
      "        values = {\n",
      "            0: player_0_value,\n",
      "            1: player_1_value\n",
      "        }\n",
      "        \n",
      "        notes = {\n",
      "            'player_0_score': player_0_score,\n",
      "            'player_1_score': player_1_score,\n",
      "            'remaining_prizes': remaining_prizes,\n",
      "            'contested_points': contested_points,\n",
      "            'estimate_player_0': player_0_value,\n",
      "            'estimate_player_1': player_1_value\n",
      "        }\n",
      "\n",
      "        return values, notes\n",
      "```\n",
      "\n",
      "### Explanation of the Heuristic:\n",
      "- **Current Scores:** The heuristic takes into account the current cumulative scores of both players.\n",
      "- **Remaining Prizes:** It assumes that the remaining prizes will be evenly split between the two players.\n",
      "- **Contested Points:** Any points that are contested from previous rounds are added to both players' estimates.\n",
      "\n",
      "This is a simple yet effective heuristic that estimates the value of a game state based on the current and potential future points. More sophisticated heuristics could take into account additional factors such as the distribution of remaining cards in the hands of the players.\n",
      "name 'ValueHeuristic' is not defined\n",
      "Certainly! Let's implement a custom value heuristic for the GOPS game. This heuristic will estimate the value of a game state based on the current scores, the cards left in the players' hands, and the prize deck.\n",
      "\n",
      "Here is the implementation of the `CustomValueHeuristic` class:\n",
      "\n",
      "```python\n",
      "from abc import ABC, abstractmethod\n",
      "from typing import Hashable, FrozenSet, Optional, Tuple, Dict\n",
      "from dataclasses import dataclass, field\n",
      "\n",
      "class ValueHeuristic(ABC):\n",
      "    '''\n",
      "    Abstract class for a value heuristic\n",
      "    '''\n",
      "    \n",
      "    def evaluate(self, state: Hashable) -> Tuple[Dict[int, float], Dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        return self._evaluate(state)\n",
      "    \n",
      "    @abstractmethod\n",
      "    def _evaluate(self, state: Hashable) -> Tuple[Dict[int, float], Dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        pass\n",
      "\n",
      "@dataclass\n",
      "class HiddenState:\n",
      "    '''\n",
      "    We assume cards in deck and hand are list(range(1, num_cards+1))\n",
      "    '''\n",
      "    player_0_played_cards: Tuple[int, ...]\n",
      "    player_1_played_cards: Tuple[int, ...]\n",
      "    played_prize_cards: Tuple[int, ...]\n",
      "    prize_deck: FrozenSet[int]\n",
      "    player_0_hand: FrozenSet[int]\n",
      "    player_1_hand: FrozenSet[int]\n",
      "    player_0_cumulative_score: int\n",
      "    player_1_cumulative_score: int\n",
      "    is_environment_turn: bool\n",
      "    contested_points: int\n",
      "    player_0_hidden_play: Optional[int] = field(default=None)\n",
      "    player_1_hidden_play: Optional[int] = field(default=None)\n",
      "\n",
      "class CustomValueHeuristic(ValueHeuristic):\n",
      "    def _evaluate(self, state: HiddenState) -> Tuple[Dict[int, float], Dict]:\n",
      "        '''\n",
      "        Evaluates the state\n",
      "\n",
      "        Args:\n",
      "            state: state to evaluate\n",
      "\n",
      "        Returns:\n",
      "            values: estimated values of the state for each player (i.e. the expected sum of future rewards from the state)\n",
      "            notes: notes about the state\n",
      "        '''\n",
      "        player_0_value = self._heuristic(state, player=0)\n",
      "        player_1_value = self._heuristic(state, player=1)\n",
      "        \n",
      "        values = {0: player_0_value, 1: player_1_value}\n",
      "        notes = {\n",
      "            'player_0_cumulative_score': state.player_0_cumulative_score,\n",
      "            'player_1_cumulative_score': state.player_1_cumulative_score,\n",
      "            'player_0_hand': state.player_0_hand,\n",
      "            'player_1_hand': state.player_1_hand,\n",
      "            'prize_deck': state.prize_deck,\n",
      "            'contested_points': state.contested_points\n",
      "        }\n",
      "        \n",
      "        return values, notes\n",
      "    \n",
      "    def _heuristic(self, state: HiddenState, player: int) -> float:\n",
      "        '''\n",
      "        A simple heuristic to estimate the value of the current state for a given player.\n",
      "\n",
      "        Args:\n",
      "            state: the current game state\n",
      "            player: the player for whom the value is being estimated (0 or 1)\n",
      "\n",
      "        Returns:\n",
      "            The estimated value of the state for the given player.\n",
      "        '''\n",
      "        opponent = 1 - player\n",
      "        player_cumulative_score = state.player_0_cumulative_score if player == 0 else state.player_1_cumulative_score\n",
      "        opponent_cumulative_score = state.player_1_cumulative_score if player == 0 else state.player_0_cumulative_score\n",
      "        player_hand = state.player_0_hand if player == 0 else state.player_1_hand\n",
      "        opponent_hand = state.player_1_hand if player == 0 else state.player_0_hand\n",
      "\n",
      "        # Heuristic: Current score + average of remaining prize deck points * (cards in hand / total remaining cards)\n",
      "        remaining_prize_points = sum(state.prize_deck)\n",
      "        remaining_cards = len(state.prize_deck)\n",
      "        if remaining_cards > 0:\n",
      "            average_prize_points = remaining_prize_points / remaining_cards\n",
      "        else:\n",
      "            average_prize_points = 0\n",
      "\n",
      "        player_value = player_cumulative_score + average_prize_points * (len(player_hand) / (len(player_hand) + len(opponent_hand)))\n",
      "        opponent_value = opponent_cumulative_score + average_prize_points * (len(opponent_hand) / (len(player_hand) + len(opponent_hand)))\n",
      "\n",
      "        # The value is the player's score minus the opponent's score, giving a relative advantage\n",
      "        return player_value - opponent_value\n",
      "```\n",
      "\n",
      "In this implementation, the `CustomValueHeuristic` class estimates the value of the state for each player based on their cumulative scores, the cards left in their hands, and the average value of the remaining prize cards. The heuristic aims to provide a relative advantage by considering the difference between the player's estimated value and the opponent's estimated value. This is a simple heuristic and can be further refined based on deeper insights into the game dynamics.\n",
      "name 'ValueHeuristic' is not defined\n"
     ]
    },
    {
     "ename": "Exception",
     "evalue": "Could not construct value heuristic",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mZeroAgent\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mGOPS_RULES_2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minspect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetsource\u001b[49m\u001b[43m(\u001b[49m\u001b[43mObservedState\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mOpenAIModel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgpt-4o\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/github-repos/SearchTechniques/src/zero/agent.py:37\u001b[0m, in \u001b[0;36mZeroAgent.__init__\u001b[0;34m(self, players, player, is_partial, game_description, observed_state_description, llm_model, rng, *args, **kwargs)\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;28mglobals\u001b[39m()[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHiddenState\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhidden_state_class\n\u001b[1;32m     36\u001b[0m \u001b[38;5;66;03m# then construct the value heuristic\u001b[39;00m\n\u001b[0;32m---> 37\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue_heuristic \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconstruct_value_heuristic\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgame_description\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhidden_state_code_string\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minspect\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetsource\u001b[49m\u001b[43m(\u001b[49m\u001b[43mValueHeuristic\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m()\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m===Value heuristic constructed!===\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     40\u001b[0m \u001b[38;5;66;03m# then construct the actor action enumerator\u001b[39;00m\n",
      "File \u001b[0;32m~/github-repos/SearchTechniques/src/zero/agent.py:224\u001b[0m, in \u001b[0;36mZeroAgent.construct_value_heuristic\u001b[0;34m(self, game_description, hidden_state_description, value_heuristic_class_description)\u001b[0m\n\u001b[1;32m    221\u001b[0m         \u001b[38;5;28mprint\u001b[39m(e)\n\u001b[1;32m    222\u001b[0m         retry \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m--> 224\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not construct value heuristic\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mException\u001b[0m: Could not construct value heuristic"
     ]
    }
   ],
   "source": [
    "ZeroAgent({0,1}, 0, True, GOPS_RULES_2, inspect.getsource(ObservedState), OpenAIModel(\"gpt-4o\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/dsi/jli/github-repos/SearchTechniques\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "print(os.getcwd())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (16455683.py, line 3)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  Input \u001b[0;32mIn [1]\u001b[0;36m\u001b[0m\n\u001b[0;31m    defaultdict(<function __main__.default_dict_factory()>, {'a': defaultdict(int, {'b': 1}), 'x': defaultdict(int, {'y': 0})})\u001b[0m\n\u001b[0m                ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "from collections import defaultdict\n",
    "\n",
    "defaultdict(<function __main__.default_dict_factory()>, {'a': defaultdict(int, {'b': 1}), 'x': defaultdict(int, {'y': 0})})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "dd = defaultdict(lambda: defaultdict(int))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dd['a']['b'] += 1\n",
    "dd['a']['b']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "invalid syntax (2512474702.py, line 1)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  Input \u001b[0;32mIn [12]\u001b[0;36m\u001b[0m\n\u001b[0;31m    defaultdict(<function __main__.<lambda>()>,\u001b[0m\n\u001b[0m                ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
     ]
    }
   ],
   "source": [
    "defaultdict(<function __main__.<lambda>()>,\n",
    "            {'outer_key': defaultdict(int, {'inner_key': 1}),\n",
    "             'another_outer': defaultdict(int, {'another_inner': 0})})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_state(state):\n",
    "    # Extracting the relevant information from the state tuple\n",
    "    score_cards = state[0]\n",
    "    my_cards = state[1]\n",
    "    opponent_cards = state[2]\n",
    "    is_turn = state[3]\n",
    "    my_score = state[4]\n",
    "    opponent_score = state[5]\n",
    "    deck = state[6]\n",
    "    \n",
    "    # Calculating the chances of winning a round based on the cards in hand\n",
    "    my_chances = 0\n",
    "    opponent_chances = 0\n",
    "    \n",
    "    my_max_card = max(my_cards)\n",
    "    opponent_max_card = max(opponent_cards)\n",
    "    \n",
    "    for card in my_cards:\n",
    "        if card > opponent_max_card:\n",
    "            my_chances += 1\n",
    "    \n",
    "    for card in opponent_cards:\n",
    "        if card > my_max_card:\n",
    "            opponent_chances += 1\n",
    "    \n",
    "    # Updating the scores based on the chances of winning\n",
    "    my_score += my_chances\n",
    "    opponent_score += opponent_chances\n",
    "    \n",
    "    # Returning the updated scores\n",
    "    return (my_score, opponent_score)\n",
    "\n",
    "# Improvement:\n",
    "# I improved the original function by precomputing the maximum card value in the player's hand and the opponent's hand once, instead of computing it multiple times in the loops.\n",
    "# This reduces the computational cost and makes the code more efficient."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{2}"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set([1,2]) - set([1,])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_state(player_score, opponent_score, remaining_score_cards, player_rounds_won, opponent_rounds_won, player_hand, opponent_hand, player_played_cards, opponent_played_cards):\n",
    "    player_potential_score = player_score\n",
    "    opponent_potential_score = opponent_score\n",
    "    \n",
    "    # Function to count high-value cards in a player's hand\n",
    "    def count_high_value_cards(hand):\n",
    "        high_value_cards = 0\n",
    "        for card in hand:\n",
    "            if card > 3:  # Assuming cards with value greater than 3 are considered high-value\n",
    "                high_value_cards += 1\n",
    "        return high_value_cards\n",
    "    \n",
    "    # Calculate the potential scores for each player based on the remaining score cards and player's hand\n",
    "    if len(remaining_score_cards) > 0:\n",
    "        remaining_score_cards_sorted = sorted(remaining_score_cards, reverse=True)  # Sort the remaining score cards in descending order\n",
    "        player_high_value_cards = count_high_value_cards(player_hand)\n",
    "        opponent_high_value_cards = count_high_value_cards(opponent_hand)\n",
    "        \n",
    "        for card in remaining_score_cards_sorted:\n",
    "            if card in player_hand and player_high_value_cards > 0:\n",
    "                player_potential_score += card\n",
    "                player_high_value_cards -= 1\n",
    "            elif card in opponent_hand and opponent_high_value_cards > 0:\n",
    "                opponent_potential_score += card\n",
    "                opponent_high_value_cards -= 1\n",
    "    \n",
    "    player_advantage = player_potential_score - opponent_potential_score\n",
    "    \n",
    "    return (player_potential_score, opponent_potential_score, player_advantage)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.insert(0, '../')\n",
    "from src.Avalon.baseline_models_Avalon import AvalonLLMFunctionalValueHeuristic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "func_str = \"\"\"def evaluate_state(state: dict) -> tuple[dict[Any, float], dict]:\n",
    "    players = state['players']\n",
    "    turn = state['turn']\n",
    "    phase = state['phase']\n",
    "    round = state['round']\n",
    "    quest_leader = state['quest_leader']\n",
    "    quest_team = state['quest_team']\n",
    "    historical_quest_results = state['historical_quest_results']\n",
    "    historical_team_votes = state['historical_team_votes']\n",
    "    num_good = state['num_good']\n",
    "    num_participants_per_quest = state['num_participants_per_quest']\n",
    "    num_fails_per_quest = state['num_fails_per_quest']\n",
    "\n",
    "    # Initialize player win rates dictionary\n",
    "    player_rates = {player: 0.5 for player in players}\n",
    "\n",
    "    # Assign different initial win rates based on player roles\n",
    "    for player in players:\n",
    "        if player in quest_team:\n",
    "            player_rates[player] = 0.6\n",
    "        else:\n",
    "            player_rates[player] = 0.4\n",
    "\n",
    "    # Modify win rate calculations based on role-specific objectives\n",
    "    if 'Merlin' in players:\n",
    "        for player in players:\n",
    "            if player == 'Merlin':\n",
    "                player_rates[player] += 0.1 * (3 - historical_quest_results.count(True))\n",
    "            elif player == 'Assassin':\n",
    "                if phase == 3:\n",
    "                    if 'Merlin' not in quest_team:\n",
    "                        player_rates[player] += 0.7\n",
    "                    else:\n",
    "                        player_rates[player] += 0.3\n",
    "            else:\n",
    "                if 'Merlin' not in quest_team:\n",
    "                    player_rates[player] += 0.1\n",
    "\n",
    "    # Calculate extra score for non-rejected rounds\n",
    "    for qb in historical_team_votes:\n",
    "        if qb:\n",
    "            for player in quest_team:\n",
    "                player_rates[player] += 0.1\n",
    "        else:\n",
    "            for player in set(players) - quest_team:\n",
    "                player_rates[player] += 0.1\n",
    "\n",
    "    intermediate_values = {'turn': turn, 'round': round}\n",
    "\n",
    "    return player_rates, intermediate_values\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "AvalonLLMFunctionalValueHeuristic.test_evaluate_static(func_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import ast\n",
    "\n",
    "def extract_and_parse_dictionary(update: str) -> dict[int, tuple[int, str]]:\n",
    "    '''\n",
    "    Extracts and parses a dictionary from a block of descriptive text. This version corrects regex patterns.\n",
    "\n",
    "    Args:\n",
    "        update: A string that includes descriptive text followed by a dictionary.\n",
    "\n",
    "    Returns:\n",
    "        A dictionary mapping each player index to a tuple containing an integer and a string\n",
    "        that describes the change in belief about the likelihood of the player being Evil.\n",
    "    '''\n",
    "    try:\n",
    "        # Corrected regex pattern to handle optional spaces, and numeric ranges appropriately\n",
    "        regex_pattern = r\"\\{\\s*\\d+\\s*:\\s*\\(-?[0-2],\\s*'[a-zA-Z\\s]+?'\\)\\s*(,\\s*\\d+\\s*:\\s*\\(-?[0-2],\\s*'[a-zA-Z\\s]+?'\\)\\s*)*\\}\"\n",
    "        match = re.search(regex_pattern, update)\n",
    "        if match:\n",
    "            dict_str = match.group(0)\n",
    "            result = ast.literal_eval(dict_str)\n",
    "            # Ensure that the result is a dictionary and the values are tuples as expected\n",
    "            if isinstance(result, dict) and all(isinstance(v, tuple) and len(v) == 2 for v in result.values()):\n",
    "                return result\n",
    "            else:\n",
    "                raise ValueError(\"The extracted data does not match the expected format (dict of int to tuples).\")\n",
    "        else:\n",
    "            raise ValueError(\"No dictionary-like string found in the text.\")\n",
    "    except (SyntaxError, ValueError) as e:\n",
    "        raise ValueError(\"Error parsing the dictionary string: \" + str(e))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: (-2, 'decreased significantly'),\n",
       " 1: (-1, 'decreased slightly'),\n",
       " 2: (1, 'increased significantly'),\n",
       " 3: (0, 'stayed the same'),\n",
       " 4: (-1, 'decreased slightly')}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_str = \"{0: (-2, 'decreased significantly'), 1: (-1, 'decreased slightly'), 2: (1, 'increased significantly'), 3: (0, 'stayed the same'), 4: (-1, 'decreased slightly')} The probability of Player 2 being Merlin increased significantly because Merlin is making a strong case for the team that includes Player 2, which could make other players suspect that Player 2 is Merlin.\"\n",
    "test_str = \"awdajwdowajdoawj{0: (-2, 'decreased significantly'), 1: (-1, 'decreased slightly'), 2: (1, 'increased significantly'), 3: (0, 'stayed the same'), 4: (-1, 'decreased slightly')}awdaowdjawd\"\n",
    "extract_and_parse_dictionary(test_str)\n",
    "\n",
    "# extract_and_parse_dictionary(\"\"\" {0: (-2, 'decreased significantly'), 1: (-1, 'decreased slightly'), 2: (1, 'increased significantly'), 3: (0, 'stayed the same'), 4: (-1, 'decreased slightly')} \n",
    "\n",
    "# The probability of Player 2 being Merlin increased significantly because Merlin is making a strong case for the team that includes Player 2, which could make other players suspect that Player 2 is Merlin.\"\"\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ml",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
