lojban-validation-debaters-0809:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  prompt_config:
    file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/prompts/configs/lojban_prompts.yaml
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-rl-round-2-240-0810-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
            max_new_tokens: 500
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/data/datasets/lojban/lojban_dataset.jsonl
    dataset_type: lojban
    split_type: val
lojban-debaters-llama-sfted-judge-nano-sfted:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  prompt_config:
    file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/prompts/configs/lojban_prompts.yaml
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "sft-0731-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
            max_new_tokens: 500
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-nano-sft-0916
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/data/datasets/lojban/lojban_dataset_test.jsonl
    dataset_type: lojban
    split_type: test
lojban-debaters-llama-rled-judge-nano-sfted:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  prompt_config:
    file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/prompts/configs/lojban_prompts.yaml
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-rl-round-2-240-0810-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
            max_new_tokens: 500
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-nano-sft-0916
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/data/datasets/lojban/lojban_dataset_test.jsonl
    dataset_type: lojban
    split_type: test
lojban-debaters-o4-mini-untrained-judge-nano-sfted:
    batch_size: 1
    num_speeches: 2
    flip: False
    enable_self_debate: True
    speech_structure: default_debate
    alternate: False
    prompt_config:
      file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/prompts/configs/lojban_prompts.yaml
    agents:
      debaters:
        - model_settings:
            model_type: openai
            alias: openai-debater-o4-untrained
            model_file_path: o4-mini-2025-04-16
            require_quote_validation: True
            generation_params:
              temperature: 0.5
              max_new_tokens: 2000
      judge:
        model_settings:
          model_type: openai
          alias: openai-judge-nano-sft-0916
          model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
    dataset:
      full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/data/datasets/lojban/lojban_dataset_test.jsonl
      dataset_type: lojban
      split_type: test
lojban-debaters-o4-mini-trained-judge-nano-sfted:
    batch_size: 1
    num_speeches: 2
    flip: False
    enable_self_debate: True
    speech_structure: default_debate
    alternate: False
    prompt_config:
      file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/prompts/configs/lojban_prompts.yaml
    agents:
      debaters:
        - model_settings:
            model_type: openai
            alias: openai-debater-o4-trained
            model_file_path: ft:o4-mini-2025-04-16:modulo-research-ltd:michael-and-khan-data-debater-15-09:CGO8p7XH
            require_quote_validation: True
            generation_params:
              temperature: 0.5
              max_new_tokens: 2000
      judge:
        model_settings:
          model_type: openai
          alias: openai-judge-nano-sft-0916
          model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
    dataset:
      full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/data/datasets/lojban/lojban_dataset_test.jsonl
      dataset_type: lojban
      split_type: test
lojban-test-extended-tokens:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-rl-round-2-240-0810-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
            max_new_tokens: 500
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/data/datasets/lojban/lojban_dataset.jsonl
    dataset_type: lojban
    split_type: val
debater-untrained-judge-untrained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/downloaded-models/gradientai/Llama-3-8B-Instruct-262k
          alias: "untrained-mega-llama"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: gpt-4.1-2025-04-14
  dataset:
    dataset_type: quality
    split_type: val
debater-untrained-judge-trained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/downloaded-models/gradientai/Llama-3-8B-Instruct-262k
          alias: "untrained-mega-llama"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-41-judge
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge:Brlimq17:ckpt-step-1290
  dataset:
    dataset_type: quality
    split_type: val
debater-untrained-gpt-o4-mini-0913-judge-trained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: openai
          alias: openai-o4-debater
          model_file_path: o4-mini-2025-04-16
    judge:
      model_settings:
        model_type: openai
        alias: openai-41-judge
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: val
quality-debaters-llama-sfted-judge-nano-sfted:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "sft-0731-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-nano-sft-0916
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    dataset_type: quality
    split_type: val
quality-debaters-llama-rled-judge-nano-sfted:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-dpo-round-2-nano-judge-0917/checkpoint-384
          alias: "dpo-rl-round-2-384-0919-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-nano-sft-0916
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    dataset_type: quality
    split_type: val
quality-debaters-o4-mini-untrained-judge-nano-sfted:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: openai
          alias: openai-debater-o4-untrained
          model_file_path: o4-mini-2025-04-16
          require_quote_validation: True
          generation_params:
            temperature: 0.5
            max_new_tokens: 1400
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-nano-sft-0916
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    dataset_type: quality
    split_type: val
quality-debaters-o4-mini-trained-judge-nano-sfted:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: openai
          alias: openai-debater-o4-trained
          model_file_path: ft:o4-mini-2025-04-16:modulo-research-ltd:michael-and-khan-data-debater-15-09:CGO8p7XH
          require_quote_validation: True
          generation_params:
            temperature: 0.5
            max_new_tokens: 1400
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-nano-sft-0916
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    dataset_type: quality
    split_type: val
debater-sft-trained-0731-judge-untrained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "sft-0731-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: gpt-4.1-2025-04-14
  dataset:
    dataset_type: quality
    split_type: val
debater-rl-trained-0711-judge-untrained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-drpo-round1
          alias: "sft-rl-0711-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: gpt-4.1-2025-04-14
  dataset:
    dataset_type: quality
    split_type: val
debater-gpt-oss-untrained-judge-trained-0731:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: open_weights_openai
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/downloaded-models/openai/gpt-oss-20b
          alias: "gpt-oss-20b-untrained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: val
debater-gpt-oss-trained-0822-judge-41-sft-0731:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: open_weights_openai
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/gpt_oss_20b_lora/lora_adapter
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/gpt_oss_20b_lora/merged
          alias: "gpt-oss-20b-lora-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-41-mini-judge-sft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: val
debater-sft-trained-0731-judge-trained-0731:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-rl-0731-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: val
debater-round2-rl-trained-0808-judge-trained-0731:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-rl-round-2-240-0810-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: val
debater-trained-judge-trained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged
          alias: "leo-sft-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-ckpt-1290-judge
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge:Brlimq17:ckpt-step-1290
  dataset:
    dataset_type: quality
    split_type: val
debater-cross-play-sft-0731-rl-0810:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "sft-0731-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-rl-round-2-240-0810-model"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
#          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["sft-0731-model", "dpo-rl-round-2-240-0810-model"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-1:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-16"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-2:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-32"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-3:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-48"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-4:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-64
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-64"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-5:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-80"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-6:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-96
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-96"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-7:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-96
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-96"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-8:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-112"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-9:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-128
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-128"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-10:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-144"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-11:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-160
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-160"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-12:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-176"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-13:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-192
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-192"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-14:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-208"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-round1-15:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-224
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "dpo-round-1-checkpoint-224"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-1:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "dpo-round-1-full-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-2:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-32
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-32-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-3:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-64
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-64-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-4:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-96
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-96-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-5:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-128
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-128-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-6:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-160
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-160-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-7:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-192
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-192-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-8:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-224
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-224-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-9:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-256
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-256-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-10:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-288
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-288-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-11:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-320
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-320-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-12:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-352
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-352-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-13:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-384
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-384-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-14:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-416
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-416-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-15:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-468
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/
          alias: "dpo-round-2-checkpoint-468-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822-16:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-full-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
debater-cross-play-sft-0731-rl-0810-checkpoint-replication-0822:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "dpo-round-1-full-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-32
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-32-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-64
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-64-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-96
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-96-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-128
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-128-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-160
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-160-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-192
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-192-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-224
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-224-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-256
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-256-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-288
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-288-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-320
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-320-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-352
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-352-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-384
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-384-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-416
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-416-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-448
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-448-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained/checkpoint-468
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-checkpoint-468-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-811-FullTrainDebateRoundTwo-full-trained
          alias: "dpo-round-2-full-trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge-ft-0731
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["dpo-round-1-full-trained", "dpo-round-1-full-trained"]
      - ["dpo-round-2-checkpoint-32-trained", "dpo-round-2-checkpoint-32-trained"]
      - ["dpo-round-2-checkpoint-64-trained", "dpo-round-2-checkpoint-64-trained"]
      - ["dpo-round-2-checkpoint-96-trained", "dpo-round-2-checkpoint-96-trained"]
      - ["dpo-round-2-checkpoint-128-trained", "dpo-round-2-checkpoint-128-trained"]
      - ["dpo-round-2-checkpoint-160-trained", "dpo-round-2-checkpoint-160-trained"]
      - ["dpo-round-2-checkpoint-192-trained", "dpo-round-2-checkpoint-192-trained"]
      - ["dpo-round-2-checkpoint-224-trained", "dpo-round-2-checkpoint-224-trained"]
      - ["dpo-round-2-checkpoint-256-trained", "dpo-round-2-checkpoint-256-trained"]
      - ["dpo-round-2-checkpoint-288-trained", "dpo-round-2-checkpoint-288-trained"]
      - ["dpo-round-2-checkpoint-320-trained", "dpo-round-2-checkpoint-320-trained"]
      - ["dpo-round-2-checkpoint-352-trained", "dpo-round-2-checkpoint-352-trained"]
      - ["dpo-round-2-checkpoint-384-trained", "dpo-round-2-checkpoint-384-trained"]
      - ["dpo-round-2-checkpoint-416-trained", "dpo-round-2-checkpoint-416-trained"]
      - ["dpo-round-2-checkpoint-448-trained", "dpo-round-2-checkpoint-448-trained"]
      - ["dpo-round-2-checkpoint-468-trained", "dpo-round-2-checkpoint-468-trained"]
      - ["dpo-round-2-full-trained", "dpo-round-2-full-trained"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
Data Generation - Llama3 - SingleRound - FullTrain - SFT:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  multi_round_branching: half
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged
          alias: "t1-sft"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge:Brlimq17:ckpt-step-1290
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
Data Generation - Llama3 - SingleRound - FullTrain - Iter2:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  multi_round_branching: half
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-902-OneIterOneTurnDebate
          alias: "t1-dpo"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
Data Generation - Llama3 - SingleRound - FullTrain - SFT - Consultancy:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  multi_round_branching: full
  speech_structure: default_consultancy
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-consultant
          alias: "t1-sft-consultancy"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
DataGenerationLlama3MultiRoundHalfBranchedFullTrainDPO-0731:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  multi_round_branching: half
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "sft-0731"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    #          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
data-generation-llama-sfted-gpt-41-nano-judge-sfted:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  multi_round_branching: half
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "sft-0731"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-nano-sft-judge
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
data-generation-llama-sfted-gpt-41-nano-judge-sfted-round-two:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  multi_round_branching: half
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-dpo-round-1-nano-judge-0917
          peft_base_model: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged-no-judge-speeches-31.07
          alias: "llama-dpo-round-1-nano-judge-0917"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-nano-sft-judge
        model_file_path: ft:gpt-4.1-nano-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-nano-16-09:CGRIpeD6
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
DataGenerationLlama3MultiRoundHalfBranchedFullTrainDPO-0731-RoundTwo:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  multi_round_branching: half
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /lambda/nfs/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-DPO-0731-FullTrainDebateR2Only-test
          alias: "rl-round-one-0807"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    #          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4.1-2025-04-14:modulo-research-ltd:michael-and-khan-data-judge-31-07:BzYGc8SU
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
Data Generation - Llama3 - MultiRound - HalfBranched - FullTrain - Consultancy:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  multi_round_branching: full
  speech_structure: default_consultancy
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-224
          alias: "r1-dpo-consultant"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
Data Generation - Llama3 - OneTurn - Branched - FullTrain - TwoIter - Consultancy:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  multi_round_branching: full
  speech_structure: default_consultancy
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-903-ConsultantSingleTrainFull
          alias: "r1-dpo-t1-consultant"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: train
    flip_sides: False
val - experiment debate - 16:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 32:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 48:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 64:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-64
          alias: "experiment debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 80:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 96:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-96
          alias: "experiment debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 112:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 128:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-128
          alias: "experiment debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 144:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 160:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-160
          alias: "experiment debate - 160.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 176:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 192:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-192
          alias: "experiment debate - 192.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 208:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 224:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-224
          alias: "experiment debate - 224.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 240:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 256:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-256
          alias: "experiment debate - 256.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 272:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-272
          alias: "experiment debate - 272.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 288:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 304:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-304
          alias: "experiment debate - 304.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 320:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-320
          alias: "experiment debate - 320.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 336:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 352:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-352
          alias: "experiment debate - 352.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 368:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-368
          alias: "experiment debate - 368.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 384:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 400:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-400
          alias: "experiment debate - 400.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 416:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-416
          alias: "experiment debate - 416.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 432:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 448:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-448
          alias: "experiment debate - 448.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 464:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 16:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-16
          alias: "experiment consultant - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 32:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-32
          alias: "experiment consultant - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 48:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-48
          alias: "experiment consultant - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 64:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-64
          alias: "experiment consultant - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 80:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-80
          alias: "experiment consultant - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 96:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-96
          alias: "experiment consultant - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 112:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-112
          alias: "experiment consultant - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 128:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-128
          alias: "experiment consultant - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 144:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-144
          alias: "experiment consultant - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 160:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-160
          alias: "experiment consultant - 160.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 176:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-176
          alias: "experiment consultant - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 192:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-192
          alias: "experiment consultant - 192.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 208:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-208
          alias: "experiment consultant - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 224:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-224
          alias: "experiment consultant - 224.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 240:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-240
          alias: "experiment consultant - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 256:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-256
          alias: "experiment consultant - 256.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 272:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-272
          alias: "experiment consultant - 272.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 288:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-288
          alias: "experiment consultant - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 304:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-304
          alias: "experiment consultant - 304.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 320:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-320
          alias: "experiment consultant - 320.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 336:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-336
          alias: "experiment consultant - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 352:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-352
          alias: "experiment consultant - 352.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 368:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-368
          alias: "experiment consultant - 368.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 384:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-384
          alias: "experiment consultant - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 400:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-400
          alias: "experiment consultant - 400.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 416:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-416
          alias: "experiment consultant - 416.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-432
          alias: "experiment consultant - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 448:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-448
          alias: "experiment consultant - 448.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-464
          alias: "experiment consultant - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
test - experiment consultant - 16:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-16
          alias: "experiment consultant - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 32:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-32
          alias: "experiment consultant - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 48:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-48
          alias: "experiment consultant - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 64:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-64
          alias: "experiment consultant - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 80:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-80
          alias: "experiment consultant - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 96:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-96
          alias: "experiment consultant - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 112:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-112
          alias: "experiment consultant - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 128:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-128
          alias: "experiment consultant - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 144:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-144
          alias: "experiment consultant - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 160:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-160
          alias: "experiment consultant - 160.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 176:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-176
          alias: "experiment consultant - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 192:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-192
          alias: "experiment consultant - 192.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 208:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-208
          alias: "experiment consultant - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 224:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-224
          alias: "experiment consultant - 224.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 240:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-240
          alias: "experiment consultant - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 256:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-256
          alias: "experiment consultant - 256.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 272:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-272
          alias: "experiment consultant - 272.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 288:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-288
          alias: "experiment consultant - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 304:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-304
          alias: "experiment consultant - 304.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 320:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-320
          alias: "experiment consultant - 320.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 336:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-336
          alias: "experiment consultant - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 352:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-352
          alias: "experiment consultant - 352.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 368:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-368
          alias: "experiment consultant - 368.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 384:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-384
          alias: "experiment consultant - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 400:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-400
          alias: "experiment consultant - 400.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 416:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-416
          alias: "experiment consultant - 416.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-432
          alias: "experiment consultant - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 448:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-448
          alias: "experiment consultant - 448.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-ConsultantFullTrainR2/checkpoint-464
          alias: "experiment consultant - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
test - experiment consultant - sft:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-consultant
          alias: "experiment consultant - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: test
val - cross-play - 0 - 176 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 176.0"]
      - ["experiment debate - 0.0", "experiment debate - 464.0"]
      - ["experiment debate - 176.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 16 - 32:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 16.0"]
      - ["experiment debate - 0.0", "experiment debate - 32.0"]
      - ["experiment debate - 16.0", "experiment debate - 32.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 48 - 80:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 48.0"]
      - ["experiment debate - 0.0", "experiment debate - 80.0"]
      - ["experiment debate - 48.0", "experiment debate - 80.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 112 - 144:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 112.0"]
      - ["experiment debate - 0.0", "experiment debate - 144.0"]
      - ["experiment debate - 112.0", "experiment debate - 144.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 208 - 240:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 208.0"]
      - ["experiment debate - 0.0", "experiment debate - 240.0"]
      - ["experiment debate - 208.0", "experiment debate - 240.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 288 - 336:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 288.0"]
      - ["experiment debate - 0.0", "experiment debate - 336.0"]
      - ["experiment debate - 288.0", "experiment debate - 336.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 384 - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 384.0"]
      - ["experiment debate - 0.0", "experiment debate - 432.0"]
      - ["experiment debate - 384.0", "experiment debate - 432.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 16 - 32 - 48 - 80:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 16.0", "experiment debate - 48.0"]
      - ["experiment debate - 16.0", "experiment debate - 80.0"]
      - ["experiment debate - 32.0", "experiment debate - 48.0"]
      - ["experiment debate - 32.0", "experiment debate - 80.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 16 - 32 - 112 - 144:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 16.0", "experiment debate - 112.0"]
      - ["experiment debate - 16.0", "experiment debate - 144.0"]
      - ["experiment debate - 32.0", "experiment debate - 112.0"]
      - ["experiment debate - 32.0", "experiment debate - 144.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 16 - 32 - 176 - 208:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 16.0", "experiment debate - 176.0"]
      - ["experiment debate - 16.0", "experiment debate - 208.0"]
      - ["experiment debate - 32.0", "experiment debate - 176.0"]
      - ["experiment debate - 32.0", "experiment debate - 208.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 16 - 32 - 240 - 288:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 16.0", "experiment debate - 240.0"]
      - ["experiment debate - 16.0", "experiment debate - 288.0"]
      - ["experiment debate - 32.0", "experiment debate - 240.0"]
      - ["experiment debate - 32.0", "experiment debate - 288.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 16 - 32 - 336 - 384:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 16.0", "experiment debate - 336.0"]
      - ["experiment debate - 16.0", "experiment debate - 384.0"]
      - ["experiment debate - 32.0", "experiment debate - 336.0"]
      - ["experiment debate - 32.0", "experiment debate - 384.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 16 - 32 - 432 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 16.0", "experiment debate - 432.0"]
      - ["experiment debate - 16.0", "experiment debate - 464.0"]
      - ["experiment debate - 32.0", "experiment debate - 432.0"]
      - ["experiment debate - 32.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 48 - 80 - 112 - 144:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 48.0", "experiment debate - 112.0"]
      - ["experiment debate - 48.0", "experiment debate - 144.0"]
      - ["experiment debate - 80.0", "experiment debate - 112.0"]
      - ["experiment debate - 80.0", "experiment debate - 144.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 48 - 80 - 176 - 208:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 48.0", "experiment debate - 176.0"]
      - ["experiment debate - 48.0", "experiment debate - 208.0"]
      - ["experiment debate - 80.0", "experiment debate - 176.0"]
      - ["experiment debate - 80.0", "experiment debate - 208.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 48 - 80 - 240 - 288:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 48.0", "experiment debate - 240.0"]
      - ["experiment debate - 48.0", "experiment debate - 288.0"]
      - ["experiment debate - 80.0", "experiment debate - 240.0"]
      - ["experiment debate - 80.0", "experiment debate - 288.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 48 - 80 - 336 - 384:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 48.0", "experiment debate - 336.0"]
      - ["experiment debate - 48.0", "experiment debate - 384.0"]
      - ["experiment debate - 80.0", "experiment debate - 336.0"]
      - ["experiment debate - 80.0", "experiment debate - 384.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 48 - 80 - 432 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 48.0", "experiment debate - 432.0"]
      - ["experiment debate - 48.0", "experiment debate - 464.0"]
      - ["experiment debate - 80.0", "experiment debate - 432.0"]
      - ["experiment debate - 80.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 112 - 144 - 176 - 208:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 112.0", "experiment debate - 176.0"]
      - ["experiment debate - 112.0", "experiment debate - 208.0"]
      - ["experiment debate - 144.0", "experiment debate - 176.0"]
      - ["experiment debate - 144.0", "experiment debate - 208.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 112 - 144 - 240 - 288:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 112.0", "experiment debate - 240.0"]
      - ["experiment debate - 112.0", "experiment debate - 288.0"]
      - ["experiment debate - 144.0", "experiment debate - 240.0"]
      - ["experiment debate - 144.0", "experiment debate - 288.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 112 - 144 - 336 - 384:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 112.0", "experiment debate - 336.0"]
      - ["experiment debate - 112.0", "experiment debate - 384.0"]
      - ["experiment debate - 144.0", "experiment debate - 336.0"]
      - ["experiment debate - 144.0", "experiment debate - 384.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 112 - 144 - 432 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 112.0", "experiment debate - 432.0"]
      - ["experiment debate - 112.0", "experiment debate - 464.0"]
      - ["experiment debate - 144.0", "experiment debate - 432.0"]
      - ["experiment debate - 144.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 176 - 208 - 288 - 336:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 176.0", "experiment debate - 288.0"]
      - ["experiment debate - 176.0", "experiment debate - 336.0"]
      - ["experiment debate - 208.0", "experiment debate - 288.0"]
      - ["experiment debate - 208.0", "experiment debate - 336.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 176 - 208 - 384 - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 176.0", "experiment debate - 384.0"]
      - ["experiment debate - 176.0", "experiment debate - 432.0"]
      - ["experiment debate - 208.0", "experiment debate - 384.0"]
      - ["experiment debate - 208.0", "experiment debate - 432.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 240 - 288 - 336:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 240.0", "experiment debate - 288.0"]
      - ["experiment debate - 240.0", "experiment debate - 336.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 208 - 240 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 208.0", "experiment debate - 464.0"]
      - ["experiment debate - 240.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 240 - 288 - 384 - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 240.0", "experiment debate - 384.0"]
      - ["experiment debate - 240.0", "experiment debate - 432.0"]
      - ["experiment debate - 288.0", "experiment debate - 384.0"]
      - ["experiment debate - 288.0", "experiment debate - 432.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 336 - 384 - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 336.0", "experiment debate - 384.0"]
      - ["experiment debate - 336.0", "experiment debate - 432.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 176 - 208 - 240:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 176.0", "experiment debate - 208.0"]
      - ["experiment debate - 176.0", "experiment debate - 240.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 288 - 336 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 288.0", "experiment debate - 464.0"]
      - ["experiment debate - 336.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 384 - 432 - 464:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 384.0", "experiment debate - 464.0"]
      - ["experiment debate - 432.0", "experiment debate - 464.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - cross-play - 0 - 336 - 432:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment debate - 0.0", "experiment debate - 336.0"]
      - ["experiment debate - 0.0", "experiment debate - 432.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
Debate_Judge_Replication_1:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: default_debate
  agents:
    debaters:
      - model_settings:
          model_type: offline
          alias: "16"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_22:45:10.795226
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "32"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_22:32:40.812217
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "48"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_14:40:51.686114
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "80"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_18:37:58.054098
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "112"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_18:47:14.278791
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "144"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_22:51:32.402362
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "208"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_22:57:09.217977
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "240"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_14:38:43.211600
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "288"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-17_03:02:53.139968
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "336"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_18:49:15.616790
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "384"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_18:42:37.904402
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "432"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_14:40:51.598517
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "464"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_14:38:45.493465
          require_quote_validation: False
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4-turbo
  tournament:
    tournament_type: self_play_only
  dataset:
    dataset_type: quality
    split_type: val
Debate_Judge_Replication_2:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: default_debate
  agents:
    debaters:
      - model_settings:
          model_type: offline
          alias: "16"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_07:04:50.767612
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "32"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_03:06:39.645025
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "48"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-13_19:12:43.269491
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "80"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-13_23:10:35.974287
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "112"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_00:27:33.992099
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "144"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_04:09:13.679077
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "208"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_07:53:14.277578
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "240"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_22:57:09.217977
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "288"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_14:38:43.211600
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "336"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-17_03:02:53.139968
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "384"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-16_18:49:15.616790
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "432"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_04:15:52.249595
          require_quote_validation: False
      - model_settings:
          model_type: offline
          alias: "464"
          offline_file_path: /vast/spa9663/outputs/transcripts/2024-08-14_00:26:21.999328
          require_quote_validation: False
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4-turbo
  tournament:
    tournament_type: self_play_only
  dataset:
    dataset_type: quality
    split_type: val
Debate_Judge_Cross_Play_Replication_One:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: default_debate
  agents:
    debaters: []
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4o
  tournament:
    tournament_type: replication
    replication_file_paths:
      - 2024-08-18_00:35:30.743648
      - 2024-08-18_03:50:19.285748
      - 2024-08-18_04:03:15.139027
      - 2024-08-18_04:10:15.287855
      - 2024-08-18_05:01:41.397113
      - 2024-08-18_05:17:32.375173
  dataset:
    dataset_type: quality
    split_type: val
Debate_Judge_Cross_Play_Replication_Two:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: default_debate
  agents:
    debaters: []
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4o
  tournament:
    tournament_type: replication
    replication_file_paths:
      - 2024-08-18_05:59:45.089083
      - 2024-08-18_06:05:02.594962
      - 2024-08-18_06:44:01.771403
      - 2024-08-18_06:53:35.683818
      - 2024-08-18_07:55:31.682382
      - 2024-08-18_07:59:44.136515
  dataset:
    dataset_type: quality
    split_type: val
Debate_Judge_Cross_Play_Replication_Three:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: default_debate
  agents:
    debaters: []
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4o
  tournament:
    tournament_type: replication
    replication_file_paths:
      - 2024-08-18_08:11:57.223798
      - 2024-08-18_08:49:03.335004
      - 2024-08-18_09:56:58.033679
      - 2024-08-18_10:48:29.957155
      - 2024-08-18_12:50:59.885730
      - 2024-08-18_14:53:31.768381
      - 2024-08-18_16:56:21.052136
  dataset:
    dataset_type: quality
    split_type: val
Debate_Judge_Cross_Play_Replication_Four:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: default_debate
  agents:
    debaters: []
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4o
  tournament:
    tournament_type: replication
    replication_file_paths:
      - 2024-08-18_18:56:11.444701
      - 2024-08-18_20:43:48.170390
      - 2024-08-18_20:45:46.783511
      - 2024-08-18_20:50:38.975524
      - 2024-08-18_21:49:08.509794
      - 2024-08-18_22:47:03.286218

  dataset:
    dataset_type: quality
    split_type: val
Debate_Judge_Cross_Play_Replication_Five:
  batch_size: 1
  num_speeches: 2
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: default_debate
  agents:
    debaters: []
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        offline_file_path: gpt-4o
  tournament:
    tournament_type: replication
    replication_file_paths:
      - 2024-08-18_22:52:17.213042
      - 2024-08-19_00:53:12.871519
      - 2024-08-19_00:58:46.510449
      - 2024-08-19_01:54:48.453765
      - 2024-08-19_02:56:49.150891
      - 2024-08-19_14:26:07.244137
      - 2024-08-19_15:31:14.044814
      - 2024-08-21_00:08:32.687885
  dataset:
    dataset_type: quality
    split_type: val
val - alternate loss functions - first part:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "SFT"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateBinary-test
          alias: "Binary"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateLogit-test
          alias: "Logit"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateLogProb-test
          alias: "LogProb"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-FullTrainDebateRedux-test
          alias: "Prob"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["SFT", "Binary"]
      - ["SFT", "Logit"]
      - ["Binary", "Logit"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - alternate loss functions - second part:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "SFT"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateBinary-test
          alias: "Binary"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateLogit-test
          alias: "Logit"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateLogProb-test
          alias: "LogProb"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-FullTrainDebateRedux-test
          alias: "Prob"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["SFT", "LogProb"]
      - ["SFT", "Prob"]
      - ["Logit", "Prob"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - alternate loss functions - third part:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "SFT"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateBinary-test
          alias: "Binary"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateLogit-test
          alias: "Logit"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-805-FullTrainDebateLogProb-test
          alias: "LogProb"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-FullTrainDebateRedux-test
          alias: "Prob"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["Binary", "LogProb"]
      - ["Binary", "Prob"]
      - ["Logit", "LogProb"]
      - ["LogProb", "Prob"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - experiment debate - 1T - SFT:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one turn - SFT"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 16:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one turn - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 32:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one turn - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 48:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one turn - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 64:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one turn - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 80:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one turn - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 96:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one turn - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 112:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one turn - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 128:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one turn - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment debate - 1T - 144:
  batch_size: 1
  num_speeches: 1
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one turn - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - SFT:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-consultant
          alias: "experiment one turn consultant - SFT"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 16:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-16
          alias: "experiment one turn consultant - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 32:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-32
          alias: "experiment one turn consultant - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 48:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-48
          alias: "experiment one turn consultant - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 64:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-64
          alias: "experiment one turn consultant - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 80:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-80
          alias: "experiment one turn consultant - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 96:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-96
          alias: "experiment one turn consultant - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 112:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-112
          alias: "experiment one turn consultant - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 128:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal/checkpoint-128
          alias: "experiment one turn consultant - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultancy - 1T - 144:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-905-ConsultantSingleTrainFullFinal
          alias: "experiment one turn consultant - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - one-turn - cross-play - 0 - 16 - 32:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 16.0"]
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 32.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 32.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 0 - 16 - 48 - 64:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 48.0"]
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 64.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 48.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 64.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 0 - 16 - 80 - 96:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 80.0"]
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 96.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 80.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 96.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 0 - 16 - 112 - 128:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 128.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 128.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 0 - 16 - 32 - 144:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 0.0", "experiment one-turn debate - 144.0"]
      - ["experiment one-turn debate - 16.0", "experiment one-turn debate - 144.0"]
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 144.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 32 - 48 - 64:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 48.0"]
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 64.0"]
      - ["experiment one-turn debate - 48.0", "experiment one-turn debate - 64.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 32 - 48 - 80 - 96:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 80.0"]
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 96.0"]
      - ["experiment one-turn debate - 48.0", "experiment one-turn debate - 80.0"]
      - ["experiment one-turn debate - 48.0", "experiment one-turn debate - 96.0"]
  dataset:
    dataset_type: quality
    split_type: val
val - one-turn - cross-play - 32 - 48 - 112 - 128:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 32.0", "experiment one-turn debate - 128.0"]
      - ["experiment one-turn debate - 48.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 48.0", "experiment one-turn debate - 128.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 64 - 80 - 96:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 64.0", "experiment one-turn debate - 80.0"]
      - ["experiment one-turn debate - 64.0", "experiment one-turn debate - 96.0"]
      - ["experiment one-turn debate - 80.0", "experiment one-turn debate - 96.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 64 - 80 - 112 - 128:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 64.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 64.0", "experiment one-turn debate - 128.0"]
      - ["experiment one-turn debate - 80.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 80.0", "experiment one-turn debate - 128.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 48 - 64 - 80 - 144:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 48.0", "experiment one-turn debate - 144.0"]
      - ["experiment one-turn debate - 64.0", "experiment one-turn debate - 144.0"]
      - ["experiment one-turn debate - 80.0", "experiment one-turn debate - 144.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 96 - 112 - 128:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 96.0", "experiment one-turn debate - 112.0"]
      - ["experiment one-turn debate - 96.0", "experiment one-turn debate - 128.0"]
      - ["experiment one-turn debate - 112.0", "experiment one-turn debate - 128.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - one-turn - cross-play - 96 - 112 - 128 - 144:
  batch_size: 1
  num_speeches: 1
  flip: True
  enable_self_debate: False
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment one-turn debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-16
          alias: "experiment one-turn debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-32
          alias: "experiment one-turn debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-48
          alias: "experiment one-turn debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-64
          alias: "experiment one-turn debate - 64.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-80
          alias: "experiment one-turn debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-96
          alias: "experiment one-turn debate - 96.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-112
          alias: "experiment one-turn debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate/checkpoint-128
          alias: "experiment one-turn debate - 128.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/trained_models/llama-3-DPO-904-OneTurnDebate
          alias: "experiment one-turn debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  tournament:
    tournament_type: capped_round_robin
    custom_matchups:
      - ["experiment one-turn debate - 96.0", "experiment one-turn debate - 144.0"]
      - ["experiment one-turn debate - 112.0", "experiment one-turn debate - 144.0"]
      - ["experiment one-turn debate - 128.0", "experiment one-turn debate - 144.0"]
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - experiment debate - untrained:
  batch_size: 1
  num_speeches: 2
  flip: False
  enable_self_debate: True
  speech_structure: default_debate
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/base_models/llama3-8b-262k
          alias: "experiment debate - untrained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment consultant - untrained:
  batch_size: 1
  num_speeches: 2
  flip: True
  enable_self_debate: True
  speech_structure: default_consultancy
  alternate: False
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path: /vast/spa9663/models/base_models/llama3-8b-262k
          alias: "experiment consultancy - untrained"
          require_quote_validation: True
          generation_params:
            temperature: 0.5
    judge:
      model_settings:
        model_type: openai
        alias: openai-judge
        model_file_path: ft:gpt-4-0613:nyu-arg::90NW3Tbx
  dataset:
    dataset_type: quality
    split_type: val
val - experiment open debate - trained:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-open-debater
          alias: "experiment debate - trained"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - experiment open debate - trained - sft base:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-open-debater-sft-base
          alias: "experiment debate - trained - sft base"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - experiment open debate - base:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: False
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/base_models/llama3-8b-262k
          alias: "experiment debate - base"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
val - experiment open debate - 0.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-mega-merged
          alias: "experiment debate - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 16.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-16
          alias: "experiment debate - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 32.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-32
          alias: "experiment debate - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 48.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-48
          alias: "experiment debate - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 80.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-80
          alias: "experiment debate - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 112.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-112
          alias: "experiment debate - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 144.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-144
          alias: "experiment debate - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 176.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-176
          alias: "experiment debate - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 208.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-208
          alias: "experiment debate - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 240.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-240
          alias: "experiment debate - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 288.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-288
          alias: "experiment debate - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 336.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-336
          alias: "experiment debate - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 384.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-384
          alias: "experiment debate - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 432.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-432
          alias: "experiment debate - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open debate - 464.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-811-FullTrainDebateLowLR-test/checkpoint-464
          alias: "experiment debate - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-merged
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 0.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-mega-consultant
          alias: "experiment consultant - 0.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 16.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-16
          alias: "experiment consultant - 16.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 32.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-32
          alias: "experiment consultant - 32.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 48.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-48
          alias: "experiment consultant - 48.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 80.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-80
          alias: "experiment consultant - 80.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 112.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-112
          alias: "experiment consultant - 112.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 144.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-144
          alias: "experiment consultant - 144.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 176.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-176
          alias: "experiment consultant - 176.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 208.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-208
          alias: "experiment consultant - 208.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 240.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-240
          alias: "experiment consultant - 240.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 288.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-288
          alias: "experiment consultant - 288.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 336.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-336
          alias: "experiment consultant - 336.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 384.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-384
          alias: "experiment consultant - 384.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 432.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-432
          alias: "experiment consultant - 432.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
val - experiment open consultancy - 464.0:
  batch_size: 1
  num_speeches: 1
  flip: False
  alternate: False
  enable_self_debate: True
  speech_structure: open_debate
  agents:
    debaters:
      - model_settings:
          model_type: llama3
          model_file_path:  /vast/spa9663/models/trained_models/llama-3-DPO-808-ConsultantFullTrain/checkpoint-464
          alias: "experiment consultant - 464.0"
          require_quote_validation: True
          generation_params:
            temperature: 0.01
          peft_base_model: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    judge:
      model_settings:
        model_type: repetitive
        alias: repetitive-judge
  dataset:
    dataset_type: quality
    split_type: val
    shuffle_deterministically: True
  tournament:
    tournament_type: self_play_only
