
<!doctype html>
<html lang="en" class="no-js">
  <head>
    
      <meta charset="utf-8">
      <meta name="viewport" content="width=device-width,initial-scale=1">
      
      
      
        <link rel="canonical" href="XXXX">
      
      
        <link rel="prev" href="../coin_game/">
      
      
        <link rel="next" href="../jaxnav/">
      
      
      <link rel="icon" href="../../assets/images/favicon.png">
      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.46">
    
    
      
        <title>Hanabi - JaxMARL Documentation</title>
      
    
    
      <link rel="stylesheet" href="../../assets/stylesheets/main.6f8fc17f.min.css">
      
        
        <link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
      
      


    
    
      
    
    
      
        
        
        <link rel="preconnect" href="XXXX" crossorigin>
        <link rel="stylesheet" href="XXXX">
        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
      
    
    
      <link rel="stylesheet" href="../../assets/_mkdocstrings.css">
    
    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
    
      

    
    
    
  </head>
  
  
    
    
      
    
    
    
    
    <body dir="ltr" data-md-color-scheme="slate" data-md-color-primary="green" data-md-color-accent="deep-purple">
  
    
    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
    <label class="md-overlay" for="__drawer"></label>
    <div data-md-component="skip">
      
        
        <a href="#hanabi" class="md-skip">
          Skip to content
        </a>
      
    </div>
    <div data-md-component="announce">
      
    </div>
    
    
      

  

<header class="md-header md-header--shadow" data-md-component="header">
  <nav class="md-header__inner md-grid" aria-label="Header">
    <a href="../.." title="JaxMARL Documentation" class="md-header__button md-logo" aria-label="JaxMARL Documentation" data-md-component="logo">
      
  
  <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>

    </a>
    <label class="md-header__button md-icon" for="__drawer">
      
      <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
    </label>
    <div class="md-header__title" data-md-component="header-title">
      <div class="md-header__ellipsis">
        <div class="md-header__topic">
          <span class="md-ellipsis">
            JaxMARL Documentation
          </span>
        </div>
        <div class="md-header__topic" data-md-component="header-topic">
          <span class="md-ellipsis">
            
              Hanabi
            
          </span>
        </div>
      </div>
    </div>
    
      
        <form class="md-header__option" data-md-component="palette">
  
    
    
    
    <input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="green" data-md-color-accent="deep-purple"  aria-label="Dark mode"  type="radio" name="__palette" id="__palette_0">
    
      <label class="md-header__button md-icon" title="Dark mode" for="__palette_1" hidden>
        <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5s-1.65.15-2.39.42zM3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29zm.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14zM20.65 7l-1.77 3.79a7.02 7.02 0 0 0-2.38-4.15zm-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29zM12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44z"/></svg>
      </label>
    
  
    
    
    
    <input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="blue" data-md-color-accent="deep-orange"  aria-label="Light mode"  type="radio" name="__palette" id="__palette_1">
    
      <label class="md-header__button md-icon" title="Light mode" for="__palette_0" hidden>
        <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3zm3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95zm-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31"/></svg>
      </label>
    
  
</form>
      
    
    
      <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
    
    
    
      <label class="md-header__button md-icon" for="__search">
        
        <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
      </label>
      <div class="md-search" data-md-component="search" role="dialog">
  <label class="md-search__overlay" for="__search"></label>
  <div class="md-search__inner" role="search">
    <form class="md-search__form" name="search">
      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
      <label class="md-search__icon md-icon" for="__search">
        
        <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
        
        <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
      </label>
      <nav class="md-search__options" aria-label="Search">
        
        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
          
          <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
        </button>
      </nav>
      
    </form>
    <div class="md-search__output">
      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
        <div class="md-search-result" data-md-component="search-result">
          <div class="md-search-result__meta">
            Initializing search
          </div>
          <ol class="md-search-result__list" role="presentation"></ol>
        </div>
      </div>
    </div>
  </div>
</div>
    
    
      <div class="md-header__source">
        <a href="XXXX" title="Go to repository" class="md-source" data-md-component="source">
  <div class="md-source__icon md-icon">
    
    <svg xmlns="XXXX" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.1 by @fontawesome - XXXX License - XXXX (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
  </div>
  <div class="md-source__repository">
    GitHub
  </div>
</a>
      </div>
    
  </nav>
  
</header>
    
    <div class="md-container" data-md-component="container">
      
      
        
          
        
      
      <main class="md-main" data-md-component="main">
        <div class="md-main__inner md-grid">
          
            
              
              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
                <div class="md-sidebar__scrollwrap">
                  <div class="md-sidebar__inner">
                    



<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
  <label class="md-nav__title" for="__drawer">
    <a href="../.." title="JaxMARL Documentation" class="md-nav__button md-logo" aria-label="JaxMARL Documentation" data-md-component="logo">
      
  
  <svg xmlns="XXXX" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>

    </a>
    JaxMARL Documentation
  </label>
  
    <div class="md-nav__source">
      <a href="XXXX" title="Go to repository" class="md-source" data-md-component="source">
  <div class="md-source__icon md-icon">
    
    <svg xmlns="XXXX" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.1 by @fontawesome - XXXX License - XXXX (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
  </div>
  <div class="md-source__repository">
    GitHub
  </div>
</a>
    </div>
  
  <ul class="md-nav__list" data-md-scrollfix>
    
      
      
  
  
  
  
    <li class="md-nav__item">
      <a href="../.." class="md-nav__link">
        
  
  <span class="md-ellipsis">
    Welcome to JaxMARL!
  </span>
  

      </a>
    </li>
  

    
      
      
  
  
  
  
    
    
    
      
        
        
      
    
    
    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
      
        
        
        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
        
          
          <label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="">
            
  
  <span class="md-ellipsis">
    API
  </span>
  

            <span class="md-nav__icon md-icon"></span>
          </label>
        
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_2">
            <span class="md-nav__icon md-icon"></span>
            API
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../../API/multi_agent_env/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    Multi agent env
  </span>
  

      </a>
    </li>
  

              
            
          </ul>
        </nav>
      
    </li>
  

    
      
      
  
  
  
  
    
    
    
      
        
        
      
    
    
    <li class="md-nav__item md-nav__item--section md-nav__item--nested">
      
        
        
        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
        
          
          <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="">
            
  
  <span class="md-ellipsis">
    Algorithms
  </span>
  

            <span class="md-nav__icon md-icon"></span>
          </label>
        
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_3">
            <span class="md-nav__icon md-icon"></span>
            Algorithms
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../../Algorithms/PPO/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    IPPO &amp; MAPPO
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../../Algorithms/QLearning/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    QLearning
  </span>
  

      </a>
    </li>
  

              
            
          </ul>
        </nav>
      
    </li>
  

    
      
      
  
  
    
  
  
  
    
    
    
      
        
        
      
    
    
    <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
      
        
        
        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked>
        
          
          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="">
            
  
  <span class="md-ellipsis">
    Environments
  </span>
  

            <span class="md-nav__icon md-icon"></span>
          </label>
        
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true">
          <label class="md-nav__title" for="__nav_4">
            <span class="md-nav__icon md-icon"></span>
            Environments
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../coin_game/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    Coin
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
    
  
  
  
    <li class="md-nav__item md-nav__item--active">
      
      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
      
      
        
      
      
        <label class="md-nav__link md-nav__link--active" for="__toc">
          
  
  <span class="md-ellipsis">
    Hanabi
  </span>
  

          <span class="md-nav__icon md-icon"></span>
        </label>
      
      <a href="./" class="md-nav__link md-nav__link--active">
        
  
  <span class="md-ellipsis">
    Hanabi
  </span>
  

      </a>
      
        

<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  
  
  
    
  
  
    <label class="md-nav__title" for="__toc">
      <span class="md-nav__icon md-icon"></span>
      Table of contents
    </label>
    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
      
        <li class="md-nav__item">
  <a href="#action-space" class="md-nav__link">
    <span class="md-ellipsis">
      Action Space
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#observation-space" class="md-nav__link">
    <span class="md-ellipsis">
      Observation Space
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#pretrained-models" class="md-nav__link">
    <span class="md-ellipsis">
      Pretrained Models
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#rendering" class="md-nav__link">
    <span class="md-ellipsis">
      Rendering
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#manual-game" class="md-nav__link">
    <span class="md-ellipsis">
      Manual Game
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#citation" class="md-nav__link">
    <span class="md-ellipsis">
      Citation
    </span>
  </a>
  
</li>
      
    </ul>
  
</nav>
      
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../jaxnav/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    JaxNav
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../mabrax/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    Multi-Agent Brax
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../mpe/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    MPE
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../overcooked/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    Overcooked
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../smax/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    SMAX
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../storm/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    STORM
  </span>
  

      </a>
    </li>
  

              
            
              
                
  
  
  
  
    <li class="md-nav__item">
      <a href="../switch_riddle/" class="md-nav__link">
        
  
  <span class="md-ellipsis">
    Switch Riddle
  </span>
  

      </a>
    </li>
  

              
            
          </ul>
        </nav>
      
    </li>
  

    
  </ul>
</nav>
                  </div>
                </div>
              </div>
            
            
              
              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
                <div class="md-sidebar__scrollwrap">
                  <div class="md-sidebar__inner">
                    

<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  
  
  
    
  
  
    <label class="md-nav__title" for="__toc">
      <span class="md-nav__icon md-icon"></span>
      Table of contents
    </label>
    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
      
        <li class="md-nav__item">
  <a href="#action-space" class="md-nav__link">
    <span class="md-ellipsis">
      Action Space
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#observation-space" class="md-nav__link">
    <span class="md-ellipsis">
      Observation Space
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#pretrained-models" class="md-nav__link">
    <span class="md-ellipsis">
      Pretrained Models
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#rendering" class="md-nav__link">
    <span class="md-ellipsis">
      Rendering
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#manual-game" class="md-nav__link">
    <span class="md-ellipsis">
      Manual Game
    </span>
  </a>
  
</li>
      
        <li class="md-nav__item">
  <a href="#citation" class="md-nav__link">
    <span class="md-ellipsis">
      Citation
    </span>
  </a>
  
</li>
      
    </ul>
  
</nav>
                  </div>
                </div>
              </div>
            
          
          
            <div class="md-content" data-md-component="content">
              <article class="md-content__inner md-typeset">
                
                  

  
  


<h1 id="hanabi">Hanabi</h1>
<p>This directory contains a MARL environment for the cooperative card game, Hanabi, implemented in JAX. It is inspired by the popular <a href="XXXX">Hanabi Learning Environment (HLE)</a>, but intended to be simpler to integrate and run with the growing ecosystem of JAX implemented RL research pipelines. </p>
<h2 id="action-space">Action Space</h2>
<p>Hanabi is a turn-based game. The current player can choose to discard or play any of the cards in their hand, or hint a colour or rank to any one of their teammates.</p>
<h2 id="observation-space">Observation Space</h2>
<p>The observations closely follow the featurization in the HLE. Each observation is comprised of 658 features:</p>
<ul>
<li><strong>Hands (127)</strong>: information about the visible hands.</li>
<li>other player hand: 125 <ul>
<li>card 0: 25,</li>
<li>card 1: 25</li>
<li>card 2: 25</li>
<li>card 3: 25</li>
<li>card 4: 25</li>
</ul>
</li>
<li>
<p>Hands missing card: 2 (one-hot)</p>
</li>
<li>
<p><strong>Board (76)</strong>: encoding of the public information visible in the board.</p>
</li>
<li>Deck: 40, thermometer </li>
<li>Fireworks: 25, one-hot</li>
<li>Info Tokens: 8, thermometer</li>
<li>
<p>ife Tokens: 3, thermometer</p>
</li>
<li>
<p><strong>Discards (50)</strong>: encoding of the cards in the discard pile.</p>
</li>
<li>Colour R: 10 bits for each card</li>
<li>Colour Y: 10 bits for each card</li>
<li>Colour G: 10 bits for each card</li>
<li>Colour W: 10 bits for each card</li>
<li>
<p>Colour B: 10 bits for each card</p>
</li>
<li>
<p><strong>Last Action (55)</strong>: encoding of the last move of the previous player.</p>
</li>
<li>Acting player index, relative to yourself: 2, one-hot</li>
<li>MoveType: 4, one-hot</li>
<li>Target player index, relative to acting player: 2, one-hot</li>
<li>Color revealed: 5, one-hot</li>
<li>Rank revealed: 5, one-hot</li>
<li>Reveal outcome 5 bits, each bit is 1 if the card was hinted at</li>
<li>Position played/discarded: 5, one-hot</li>
<li>Card played/discarded 25, one-hot</li>
<li>Card played scored: 1</li>
<li>
<p>Card played added info token: 1</p>
</li>
<li>
<p><strong>V0 belief (350)</strong>: trivially-computed probability of being a specific car (given the played-discarded cards and the hints given), for each card of each player.</p>
</li>
<li>Possible Card (for each card): 25 (* 10)</li>
<li>Colour hinted (for each card): 5 (* 10)</li>
<li>Rank hinted (for each card): 5 (* 10)</li>
</ul>
<h2 id="pretrained-models">Pretrained Models</h2>
<p>We make available to use some pretrained models. For example you can use a jax conversion of the original R2D2 OBL model in this way:</p>
<ol>
<li>Download the models from Hugginface: <code>git clone XXXX</code> (ensure to have git lfs installed). You can also use the script: <code>bash jaxmarl/environments/hanabi/models/download_r2d2_obl.sh</code></li>
<li>Load the parameters, import the agent wrapper and use it with JaxMarl Hanabi:</li>
</ol>
<div class="language-python highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="err">!</span><span class="n">git</span> <span class="n">clone</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">huggingface</span><span class="o">.</span><span class="n">co</span><span class="o">/</span><span class="n">mttga</span><span class="o">/</span><span class="n">obl</span><span class="o">-</span><span class="n">r2d2</span><span class="o">-</span><span class="n">flax</span>
</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="kn">import</span> <span class="nn">jax</span>
</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="kn">from</span> <span class="nn">jax</span> <span class="kn">import</span> <span class="n">numpy</span> <span class="k">as</span> <span class="n">jnp</span>
</span><span id="__span-0-4"><a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="kn">from</span> <span class="nn">jaxmarl</span> <span class="kn">import</span> <span class="n">make</span>
</span><span id="__span-0-5"><a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="kn">from</span> <span class="nn">jaxmarl.wrappers.baselines</span> <span class="kn">import</span> <span class="n">load_params</span>
</span><span id="__span-0-6"><a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="kn">from</span> <span class="nn">jaxmarl.environments.hanabi.pretrained</span> <span class="kn">import</span> <span class="n">OBLAgentR2D2</span>
</span><span id="__span-0-7"><a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a>
</span><span id="__span-0-8"><a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a><span class="n">weight_file</span> <span class="o">=</span> <span class="s2">&quot;jaxmarl/environments/hanabi/pretrained/obl-r2d2-flax/icml_OBL1/OFF_BELIEF1_SHUFFLE_COLOR0_BZA0_BELIEF_a.safetensors&quot;</span>
</span><span id="__span-0-9"><a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a><span class="n">params</span> <span class="o">=</span> <span class="n">load_params</span><span class="p">(</span><span class="n">weight_file</span><span class="p">)</span>
</span><span id="__span-0-10"><a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a>
</span><span id="__span-0-11"><a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a><span class="n">agent</span> <span class="o">=</span> <span class="n">OBLAgentR2D2</span><span class="p">()</span>
</span><span id="__span-0-12"><a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a><span class="n">agent_carry</span> <span class="o">=</span> <span class="n">agent</span><span class="o">.</span><span class="n">initialize_carry</span><span class="p">(</span><span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="mi">0</span><span class="p">),</span> <span class="n">batch_dims</span><span class="o">=</span><span class="p">(</span><span class="mi">2</span><span class="p">,))</span>
</span><span id="__span-0-13"><a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a>
</span><span id="__span-0-14"><a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a><span class="n">rng</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
</span><span id="__span-0-15"><a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a><span class="n">env</span> <span class="o">=</span> <span class="n">make</span><span class="p">(</span><span class="s1">&#39;hanabi&#39;</span><span class="p">)</span>
</span><span id="__span-0-16"><a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a><span class="n">obs</span><span class="p">,</span> <span class="n">env_state</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">rng</span><span class="p">)</span>
</span><span id="__span-0-17"><a id="__codelineno-0-17" name="__codelineno-0-17" href="#__codelineno-0-17"></a><span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">env_state</span><span class="p">)</span>
</span><span id="__span-0-18"><a id="__codelineno-0-18" name="__codelineno-0-18" href="#__codelineno-0-18"></a>
</span><span id="__span-0-19"><a id="__codelineno-0-19" name="__codelineno-0-19" href="#__codelineno-0-19"></a><span class="n">batchify</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">jnp</span><span class="o">.</span><span class="n">stack</span><span class="p">([</span><span class="n">x</span><span class="p">[</span><span class="n">agent</span><span class="p">]</span> <span class="k">for</span> <span class="n">agent</span> <span class="ow">in</span> <span class="n">env</span><span class="o">.</span><span class="n">agents</span><span class="p">])</span>
</span><span id="__span-0-20"><a id="__codelineno-0-20" name="__codelineno-0-20" href="#__codelineno-0-20"></a><span class="n">unbatchify</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="p">{</span><span class="n">agent</span><span class="p">:</span><span class="n">x</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">agent</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">env</span><span class="o">.</span><span class="n">agents</span><span class="p">)}</span>
</span><span id="__span-0-21"><a id="__codelineno-0-21" name="__codelineno-0-21" href="#__codelineno-0-21"></a>
</span><span id="__span-0-22"><a id="__codelineno-0-22" name="__codelineno-0-22" href="#__codelineno-0-22"></a><span class="n">agent_input</span> <span class="o">=</span> <span class="p">(</span>
</span><span id="__span-0-23"><a id="__codelineno-0-23" name="__codelineno-0-23" href="#__codelineno-0-23"></a>    <span class="n">batchify</span><span class="p">(</span><span class="n">obs</span><span class="p">),</span>
</span><span id="__span-0-24"><a id="__codelineno-0-24" name="__codelineno-0-24" href="#__codelineno-0-24"></a>    <span class="n">batchify</span><span class="p">(</span><span class="n">env</span><span class="o">.</span><span class="n">get_legal_moves</span><span class="p">(</span><span class="n">env_state</span><span class="p">))</span>
</span><span id="__span-0-25"><a id="__codelineno-0-25" name="__codelineno-0-25" href="#__codelineno-0-25"></a><span class="p">)</span>
</span><span id="__span-0-26"><a id="__codelineno-0-26" name="__codelineno-0-26" href="#__codelineno-0-26"></a><span class="n">agent_carry</span><span class="p">,</span> <span class="n">actions</span> <span class="o">=</span> <span class="n">agent</span><span class="o">.</span><span class="n">greedy_act</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">agent_carry</span><span class="p">,</span> <span class="n">agent_input</span><span class="p">)</span>
</span><span id="__span-0-27"><a id="__codelineno-0-27" name="__codelineno-0-27" href="#__codelineno-0-27"></a><span class="n">actions</span> <span class="o">=</span> <span class="n">unbatchify</span><span class="p">(</span><span class="n">actions</span><span class="p">)</span>
</span><span id="__span-0-28"><a id="__codelineno-0-28" name="__codelineno-0-28" href="#__codelineno-0-28"></a>
</span><span id="__span-0-29"><a id="__codelineno-0-29" name="__codelineno-0-29" href="#__codelineno-0-29"></a><span class="n">obs</span><span class="p">,</span> <span class="n">env_state</span><span class="p">,</span> <span class="n">rewards</span><span class="p">,</span> <span class="n">done</span><span class="p">,</span> <span class="n">info</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step</span><span class="p">(</span><span class="n">rng</span><span class="p">,</span> <span class="n">env_state</span><span class="p">,</span> <span class="n">actions</span><span class="p">)</span>
</span><span id="__span-0-30"><a id="__codelineno-0-30" name="__codelineno-0-30" href="#__codelineno-0-30"></a>
</span><span id="__span-0-31"><a id="__codelineno-0-31" name="__codelineno-0-31" href="#__codelineno-0-31"></a><span class="nb">print</span><span class="p">(</span><span class="s1">&#39;actions:&#39;</span><span class="p">,</span> <span class="p">{</span><span class="n">agent</span><span class="p">:</span><span class="n">env</span><span class="o">.</span><span class="n">action_encoding</span><span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">a</span><span class="p">)]</span> <span class="k">for</span> <span class="n">agent</span><span class="p">,</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">actions</span><span class="o">.</span><span class="n">items</span><span class="p">()})</span>
</span><span id="__span-0-32"><a id="__codelineno-0-32" name="__codelineno-0-32" href="#__codelineno-0-32"></a><span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">env_state</span><span class="p">)</span>
</span></code></pre></div>
<h2 id="rendering">Rendering</h2>
<p>You can render the full environment state:</p>
<div class="language-python highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="n">obs</span><span class="p">,</span> <span class="n">env_state</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">reset</span><span class="p">(</span><span class="n">rng</span><span class="p">)</span>
</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="n">env</span><span class="o">.</span><span class="n">render</span><span class="p">(</span><span class="n">env_state</span><span class="p">)</span>
</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a>
</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="n">Turn</span><span class="p">:</span> <span class="mi">0</span>
</span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a>
</span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a><span class="n">Score</span><span class="p">:</span> <span class="mi">0</span>
</span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a><span class="n">Information</span><span class="p">:</span> <span class="mi">8</span>
</span><span id="__span-1-8"><a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a><span class="n">Lives</span><span class="p">:</span> <span class="mi">3</span>
</span><span id="__span-1-9"><a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a><span class="n">Deck</span><span class="p">:</span> <span class="mi">40</span>
</span><span id="__span-1-10"><a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a><span class="n">Discards</span><span class="p">:</span>                                                  
</span><span id="__span-1-11"><a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a><span class="n">Fireworks</span><span class="p">:</span>     
</span><span id="__span-1-12"><a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a><span class="n">Actor</span> <span class="mi">0</span> <span class="n">Hand</span><span class="p">:</span><span class="o">&lt;--</span> <span class="n">current</span> <span class="n">player</span>
</span><span id="__span-1-13"><a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a><span class="mi">0</span> <span class="n">W3</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-14"><a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a><span class="mi">1</span> <span class="n">G5</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-15"><a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a><span class="mi">2</span> <span class="n">G4</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-16"><a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a><span class="mi">3</span> <span class="n">G1</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-17"><a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a><span class="mi">4</span> <span class="n">Y2</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-18"><a id="__codelineno-1-18" name="__codelineno-1-18" href="#__codelineno-1-18"></a><span class="n">Actor</span> <span class="mi">1</span> <span class="n">Hand</span><span class="p">:</span>
</span><span id="__span-1-19"><a id="__codelineno-1-19" name="__codelineno-1-19" href="#__codelineno-1-19"></a><span class="mi">0</span> <span class="n">R3</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-20"><a id="__codelineno-1-20" name="__codelineno-1-20" href="#__codelineno-1-20"></a><span class="mi">1</span> <span class="n">B1</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-21"><a id="__codelineno-1-21" name="__codelineno-1-21" href="#__codelineno-1-21"></a><span class="mi">2</span> <span class="n">G1</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-22"><a id="__codelineno-1-22" name="__codelineno-1-22" href="#__codelineno-1-22"></a><span class="mi">3</span> <span class="n">R4</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span><span id="__span-1-23"><a id="__codelineno-1-23" name="__codelineno-1-23" href="#__codelineno-1-23"></a><span class="mi">4</span> <span class="n">W4</span> <span class="o">||</span> <span class="n">XX</span><span class="o">|</span><span class="n">RYGWB12345</span>
</span></code></pre></div>
<p>Or you can render the partial observation of the current agent:</p>
<div class="language-python highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="n">obs</span><span class="p">,</span> <span class="n">new_env_state</span><span class="p">,</span> <span class="n">rewards</span><span class="p">,</span> <span class="n">dones</span><span class="p">,</span> <span class="n">infos</span>  <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">step_env</span><span class="p">(</span><span class="n">rng</span><span class="p">,</span> <span class="n">env_state</span><span class="p">,</span> <span class="n">actions</span><span class="p">)</span>
</span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="n">obs_s</span> <span class="o">=</span> <span class="n">env</span><span class="o">.</span><span class="n">get_obs_str</span><span class="p">(</span><span class="n">new_env_state</span><span class="p">,</span> <span class="n">env_state</span><span class="p">,</span> <span class="n">a</span><span class="p">,</span> <span class="n">include_belief</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">best_belief</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
</span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="nb">print</span><span class="p">(</span><span class="n">obs_s</span><span class="p">)</span>
</span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a>
</span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="n">Turn</span><span class="p">:</span> <span class="mi">1</span>
</span><span id="__span-2-6"><a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a>
</span><span id="__span-2-7"><a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a><span class="n">Score</span><span class="p">:</span> <span class="mi">0</span>
</span><span id="__span-2-8"><a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a><span class="n">Information</span> <span class="n">available</span><span class="p">:</span> <span class="mi">7</span>
</span><span id="__span-2-9"><a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a><span class="n">Lives</span> <span class="n">available</span><span class="p">:</span> <span class="mi">3</span>
</span><span id="__span-2-10"><a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a><span class="n">Deck</span> <span class="n">remaining</span> <span class="n">cards</span><span class="p">:</span> <span class="mi">40</span>
</span><span id="__span-2-11"><a id="__codelineno-2-11" name="__codelineno-2-11" href="#__codelineno-2-11"></a><span class="n">Discards</span><span class="p">:</span>                                                  
</span><span id="__span-2-12"><a id="__codelineno-2-12" name="__codelineno-2-12" href="#__codelineno-2-12"></a><span class="n">Fireworks</span><span class="p">:</span>     
</span><span id="__span-2-13"><a id="__codelineno-2-13" name="__codelineno-2-13" href="#__codelineno-2-13"></a><span class="n">Other</span> <span class="n">Hand</span><span class="p">:</span>
</span><span id="__span-2-14"><a id="__codelineno-2-14" name="__codelineno-2-14" href="#__codelineno-2-14"></a><span class="mi">0</span> <span class="n">Card</span><span class="p">:</span> <span class="n">W3</span><span class="p">,</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB12345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.060</span><span class="p">]</span>
</span><span id="__span-2-15"><a id="__codelineno-2-15" name="__codelineno-2-15" href="#__codelineno-2-15"></a><span class="mi">1</span> <span class="n">Card</span><span class="p">:</span> <span class="n">G5</span><span class="p">,</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB12345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.060</span><span class="p">]</span>
</span><span id="__span-2-16"><a id="__codelineno-2-16" name="__codelineno-2-16" href="#__codelineno-2-16"></a><span class="mi">2</span> <span class="n">Card</span><span class="p">:</span> <span class="n">G4</span><span class="p">,</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB12345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.060</span><span class="p">]</span>
</span><span id="__span-2-17"><a id="__codelineno-2-17" name="__codelineno-2-17" href="#__codelineno-2-17"></a><span class="mi">3</span> <span class="n">Card</span><span class="p">:</span> <span class="n">G1</span><span class="p">,</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB12345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.060</span><span class="p">]</span>
</span><span id="__span-2-18"><a id="__codelineno-2-18" name="__codelineno-2-18" href="#__codelineno-2-18"></a><span class="mi">4</span> <span class="n">Card</span><span class="p">:</span> <span class="n">Y2</span><span class="p">,</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB12345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.060</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.060</span><span class="p">]</span>
</span><span id="__span-2-19"><a id="__codelineno-2-19" name="__codelineno-2-19" href="#__codelineno-2-19"></a><span class="n">Your</span> <span class="n">Hand</span><span class="p">:</span>
</span><span id="__span-2-20"><a id="__codelineno-2-20" name="__codelineno-2-20" href="#__codelineno-2-20"></a><span class="mi">0</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span>  <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB2345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R2</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">R3</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">R4</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">Y2</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">Y3</span><span class="p">:</span> <span class="mf">0.057</span><span class="p">]</span>
</span><span id="__span-2-21"><a id="__codelineno-2-21" name="__codelineno-2-21" href="#__codelineno-2-21"></a><span class="mi">1</span> <span class="n">Hints</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB1</span><span class="p">,</span>    <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.200</span><span class="p">]</span>
</span><span id="__span-2-22"><a id="__codelineno-2-22" name="__codelineno-2-22" href="#__codelineno-2-22"></a><span class="mi">2</span> <span class="n">Hints</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB1</span><span class="p">,</span>    <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">Y1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">G1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">W1</span><span class="p">:</span> <span class="mf">0.200</span> <span class="n">B1</span><span class="p">:</span> <span class="mf">0.200</span><span class="p">]</span>
</span><span id="__span-2-23"><a id="__codelineno-2-23" name="__codelineno-2-23" href="#__codelineno-2-23"></a><span class="mi">3</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span>  <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB2345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R2</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">R3</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">R4</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">Y2</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">Y3</span><span class="p">:</span> <span class="mf">0.057</span><span class="p">]</span>
</span><span id="__span-2-24"><a id="__codelineno-2-24" name="__codelineno-2-24" href="#__codelineno-2-24"></a><span class="mi">4</span> <span class="n">Hints</span><span class="p">:</span> <span class="p">,</span>  <span class="n">Possible</span><span class="p">:</span> <span class="n">RYGWB2345</span><span class="p">,</span> <span class="n">Belief</span><span class="p">:</span> <span class="p">[</span><span class="n">R2</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">R3</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">R4</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">Y2</span><span class="p">:</span> <span class="mf">0.057</span> <span class="n">Y3</span><span class="p">:</span> <span class="mf">0.057</span><span class="p">]</span>
</span><span id="__span-2-25"><a id="__codelineno-2-25" name="__codelineno-2-25" href="#__codelineno-2-25"></a><span class="n">Last</span> <span class="n">action</span><span class="p">:</span> <span class="n">H1</span>
</span><span id="__span-2-26"><a id="__codelineno-2-26" name="__codelineno-2-26" href="#__codelineno-2-26"></a><span class="n">Cards</span> <span class="n">afected</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span> <span class="mi">2</span><span class="p">]</span>
</span><span id="__span-2-27"><a id="__codelineno-2-27" name="__codelineno-2-27" href="#__codelineno-2-27"></a><span class="n">Legal</span> <span class="n">Actions</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;D0&#39;</span><span class="p">,</span> <span class="s1">&#39;D1&#39;</span><span class="p">,</span> <span class="s1">&#39;D2&#39;</span><span class="p">,</span> <span class="s1">&#39;D3&#39;</span><span class="p">,</span> <span class="s1">&#39;D4&#39;</span><span class="p">,</span> <span class="s1">&#39;P0&#39;</span><span class="p">,</span> <span class="s1">&#39;P1&#39;</span><span class="p">,</span> <span class="s1">&#39;P2&#39;</span><span class="p">,</span> <span class="s1">&#39;P3&#39;</span><span class="p">,</span> <span class="s1">&#39;P4&#39;</span><span class="p">,</span> <span class="s1">&#39;HY&#39;</span><span class="p">,</span> <span class="s1">&#39;HG&#39;</span><span class="p">,</span> <span class="s1">&#39;HW&#39;</span><span class="p">,</span> <span class="s1">&#39;H1&#39;</span><span class="p">,</span> <span class="s1">&#39;H2&#39;</span><span class="p">,</span> <span class="s1">&#39;H3&#39;</span><span class="p">,</span> <span class="s1">&#39;H4&#39;</span><span class="p">,</span> <span class="s1">&#39;H5&#39;</span><span class="p">]</span>
</span></code></pre></div>
<h2 id="manual-game">Manual Game</h2>
<p>You can test the environment and your models by using the <code>manual_game.py</code> script in this folder. It allows to control one or two agents with the keyboard and one or two agents with a pretrained model (an obl model by default). For example, to play with an obl pretrained model:</p>
<div class="language-text highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>python manual_game.py \
</span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a>  --player0 &quot;manual&quot; \
</span><span id="__span-3-3"><a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a>  --player1 &quot;obl&quot; \
</span><span id="__span-3-4"><a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a>  --weight1 &quot;./pretrained/obl-r2d2-flax/icml_OBL1/OFF_BELIEF1_SHUFFLE_COLOR0_BZA0_BELIEF_a.safetensors&quot; \
</span></code></pre></div>
<p>Or to look an obl model playing with itself:</p>
<div class="language-text highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>python manual_game.py \
</span><span id="__span-4-2"><a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a>  --player0 &quot;obl&quot; \
</span><span id="__span-4-3"><a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a>  --player1 &quot;obl&quot; \
</span><span id="__span-4-4"><a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a>  --weight0 &quot;./pretrained/obl-r2d2-flax/icml_OBL1/OFF_BELIEF1_SHUFFLE_COLOR0_BZA0_BELIEF_a.safetensors&quot; \
</span><span id="__span-4-5"><a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a>  --weight1 &quot;./pretrained/obl-r2d2-flax/icml_OBL1/OFF_BELIEF1_SHUFFLE_COLOR0_BZA0_BELIEF_a.safetensors&quot; \
</span></code></pre></div>
<h2 id="citation">Citation</h2>
<p>The environment was orginally described in the following work:
<div class="language-text highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a>@article{bard2019hanabi,
</span><span id="__span-5-2"><a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a>  title={The Hanabi Challenge: A New Frontier for AI Research},
</span><span id="__span-5-3"><a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a>  author={Bard, Nolan and Foerster, Jakob N. and Chandar, Sarath and Burch, Neil and Lactot, Marc and Song,    H. Francis and Parisotto, Emilio and Dumoulin, Vincent and Moitra, Subhodeep and Hughes, Edward and          Dunning, Ian and Mourad, Shibl and Larochelle, Hugo and Bellemare, Marc G. and Bowling},
</span><span id="__span-5-4"><a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a>  journal={Artificial Intelligence Journal},
</span><span id="__span-5-5"><a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a>  year={2019}
</span><span id="__span-5-6"><a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a>}
</span></code></pre></div></p>












                
              </article>
            </div>
          
          
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
        </div>
        
      </main>
      
        <footer class="md-footer">
  
  <div class="md-footer-meta md-typeset">
    <div class="md-footer-meta__inner md-grid">
      <div class="md-copyright">
  
  
    Made with
    <a href="XXXX" target="_blank" rel="noopener">
      Material for MkDocs
    </a>
  
</div>
      
    </div>
  </div>
</footer>
      
    </div>
    <div class="md-dialog" data-md-component="dialog">
      <div class="md-dialog__inner md-typeset"></div>
    </div>
    
    
    <script id="__config" type="application/json">{"base": "../..", "features": ["navigation.sections"], "search": "../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
    
    
      <script src="../../assets/javascripts/bundle.83f73b43.min.js"></script>
      
    
  </body>
</html>