MyExperiment: # The name of your experiment
  batch_size: 1 # Number of rounds to be processed simultaneously
  num_speeches: 1 # Number of speeches that each debater will give per round
  flip: True # Whether each debater will debate each side of the debate
  enable_self_debate: True # Whether self-play is allowed or just cross-play
  speech_structure: default_debate # This references the specific speech ordering you want to use. Defaults to default_debate. Other option at the moment is default_consultancy.
  previous_run: # Config that lets you reuse information from previous runs
    file_path: path-to-previous-run # path to the previous run you want to use
    replicate_topics: True # whether you want to use the same questions as the previous run (useful for making performance comparisons)
    merge_results: False # whether you want to merge the results of the previous run in the final results csv and graphs
  prompt_config: # Specifies which set of prompts to use -- if not specified, the defaults will be used
    file_path: /path/to/source/code/prompts/configs/prompts.yaml # Path to prompts file
    default_prompt_name: Base Prompt # Name of the specific set of prompts to use
    use_hardcoded_topics: False # Whether one wants to hard code the topics to debate
    hardcoded_topic_config: null # The specific config that's used if use_hardcoded_topics is True
  agents: # The configuration for the debaters and judges that will be participating in the debate round
    debaters: # The configuration for the debaters. You must specify at least 1. If only 1 is specified, the model will debate itself. If more than 2 are specified, then a round robin will be performed.
    - model_settings: # specifies the configuration to be used for the model this debater will use
        model_type: llama # The type of model that is to be used. See the README for a comprehensive list of options
        model_file_path: /path/to/llama/directory # The path to the model weights. Not needed if the model doesn't have weights
        alias: model-name # A name to identify the model. If this name is duplicated, then stats for those debaters will be aggregated
        require_quote_validation: True # Whether quotes generated with quote tags should be validated (defaults to True)
        generation_params: # Optional configuration that controls the LLM generations
          max_new_tokens: 300 # Max to generate in each speech, defaults to 300
          temperature: 0.5 # temperature for generation, defaults to 0.5
          top_p: 1.0 # top p for generation, defaults to 1.0
          repetition_penalty: # penalty for repetition, defaults to 1.2 (but is disabled based on the use_generation_penalties flag)
          do_sample: True # whether to sample from the model (True) or do greedy decoding (False)
          use_generation_penalties: False # whether to disable the repetition_penalty and exponential length penalty (False) or enable them (True)
        peft_base_model: /path/to/peft/base_model # The path to the base model. Optional (even if one is using a peft model) but speeds up inference
    - model_settings:
        model_type: deterministic # The type of model for the second debater 
        alias: model-name-2 # Alias for the second debater
        override_prompt: override-prompt-name # the name of the prompt to use for this debater only
        nucleus: True # the decoding strategy (True -> nucleus sampling [default], False -> beam search)
        is_human: False # Whether the model being used should be converted into a human debate model (which uses the real transcripts)
      best_of_n:
        n: 4 # the number of samples to draw
        opponent_n: 2 # the number of opponent samples to test against
        maxmin: True # whether to select the speech with the best minimum score or the best average score
        recompute: False # whether to rejudge each speech to see which is the best
    - model_settings:
        model_type: random # the type of the third debater
        alias: random-model # the name of the third debater
        offline_file_path: offline-data-prefix # either the timestamp of the offline debate run (if the transcript is in the default output folder) or the full path to all the offline debates (Only needed if one is recreating a previously-run debate)
      scratchpad:
        use_scratchpad: True # Whether the debater gets access to a scratchpad (default to false)
        scratchpad_word_limit: 100 # Number of words that the debater can use a scratchpad for (0 or None disables scratchpad usage)
        scratchpad_public: False # Whether the scratchpad generations are exposed to the other participants
    judge:
      model_settings:
        model_type: llama # The model type for the judge
        model_file_path: /path/to/model/weights # The path to the model weights. Not needed if the model doesn't have weights
        alias: judge-alias # Name of the judging model
  dataset: # Configuration for the dataset to be used
    dataset_type: quality_debates # the name of the dataset 
    full_dataset_file_path: null # path to a file that stores the entire dataset. Not needed if the dataset is pre-split or if defaults are used
    train_file_path: null # path to a file that stores the training dataset. Not needed if the dataset is not pre-split or if defaults are used
    val_file_path: null # path to a file that stores the training dataset. Not needed if the dataset is not pre-split or if defaults are used
    test_file_path: null # path to a file that stores the training dataset. Not needed if the dataset is not pre-split or if defaults are used
    supplemental_file_paths: null # set of additional kwargs that are specific to individual dataset
    split_type: train # the split of the data to actually use
    shuffle_deterministically: False # whether the order of the rounds should be shuffled (False) or fixed (defaults False) 
  tournament: # a configuration that lets you specify the type of tournament you want to run. Not needed if you're using a round robin
    tournament_type: custom # other options include round_robin (default), capped round robin, replication, and self_play_only.
    custom_matchups: # for the custom tournament type specifically, these are the aliases that you want to debate against each other
      - ["model-name", "model-name-2"]
      - ["model-name", "random"]