origin:
  name: rlupus
  source: https://example.com
  upstream_source: https://github.com/nicofirst1/rl_werewolf
  paper: https://arxiv.org/abs/2106.05018
system:
  game_type: conversation
  game_subtype: social_deduction
  observation_type: vector
  observation_continuous: false
  multi_step: true
  data_source: null
  seeding_available: false
  symmetric_agents: true
  more_than_2_agents: true
  multi_utterance: true
  variants:
    9-player.run-0:
      message_length: 9
      vocab_size: 9
      note: reached 0.88 WR (win rate) in 38 hours (no automatic stopping)
    9-player.run-1:
      message_length: 9
      vocab_size: 9
      note: reached 0.44 WR, stopped at 48 hours
    9-player.run-2:
      message_length: 9
      vocab_size: 9
      note: reached 0.44 WR, stopped at 48 hours
    9-player.run-3:
      message_length: 9
      vocab_size: 9
      note: reached 0.44 WR, stopped at 37 hours
    21-player.run-0:
      message_length: 21
      vocab_size: 21
      note: reached 0.88 WR in 34 hours (no automatic stopping)
    21-player.run-1:
      message_length: 21
      vocab_size: 21
      note: reached 0.55 WR, stopped at 48 hours
    21-player.run-2:
      message_length: 21
      vocab_size: 21
      note: reached 0.85 WR in 28 hours
notes: >
  This environment is somewhat unstable and does not provide random seeding, so
  reproducing exact results is tricky.  Nevertheless, environment converge about
  half of the time (i.e., reach 85% villager win rate before 48 hours of
  training).
  9 player (~11M steps per hour).
  21 player (~4M steps per hour).
