version: 0.1
id: marshmallow_experiment
description: A simulation of the Stanford Marshmallow Experiment using the standard TextEnv.
max_steps: 20 # The number of turns each agent must wait to "win".
question_budget: 3

shared_instruction_prompt: 
  broadcast: >
    {communication_type} {reward_visibility} {distraction}
  isolated: >
    {communication_type} {reward_visibility} {distraction}

common_reflection_knowledge:
  - "{persona_age} {hedonic}"

rules_prompt:
  - You can choose to eat your single marshmallow at any time by saying "I eat the marshmallow".
  - If you eat the marshmallow your waiting is over.
  - However, if you can wait for {max_steps} minutes without eating it, you will receive 2 marshmallows.

visible_question_budget:
  status: false
  content: >
    {visible_question_budget} 

tool_use_policy: >
  {tool_use_policy}

behavioral_prompt: >

environment_question:
  common_step: >
    What will you do?
  final_step: >
    You won!

answer_format: >
  CRITICAL RULE: In your JSON {Answer: <value>} you must only provide exactly one of the following values: "I eat the marshmallow" | "I wait".

initial_messages:
  - role: user
    content: "The experiment has now begun. Good luck."
    to: ["*"]

final_resolution_prompt: >
  You have waited for {max_steps} minutes. And receive one more marshmallow. 


reward_scheme:
  type: exclusive
  success_keyword: "I eat the marshmallow"
  reward: 1.0
  final_reward: 2.0

prompt_time_stamping:
  enabled: true
  template: "Minute {time} of {max_time} has passed.\n"
  time_step_minutes: 1