Test:
    model_name: stub_model
    target: debater
    max_length: 4096
    requires_token: True
    opening_speeches_only: False
    llm_type: llama3
    training_hyperparameters:
        num_train_epochs: 4
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_8bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: stub_model
    dataset:
        - dataset_type: quality_consultancy
          full_dataset_file_path: /Users/samarnesen/Downloads/human_and_gpt4_debates_and_consultancies.jsonl #human_and_gpt4_debates.jsonl
        - dataset_type: external_huggingface
          full_dataset_file_path: yahma/alpaca-cleaned
    speech_structure: 
        - default_consultancy
Test - Judge:
    model_name: stub_model
    target: judge
    max_length: 4096
    requires_token: True
    opening_speeches_only: True
    llm_type: openai
    training_hyperparameters:
        num_train_epochs: 4
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_8bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: stub_model
    dataset:
        - dataset_type: quality
    speech_structure: 
        - empty_round
Test - Open:
    model_name: stub_model
    target: judge
    max_length: 4096
    requires_token: True
    opening_speeches_only: False
    llm_type: llama3
    training_hyperparameters:
        num_train_epochs: 1
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_8bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
        supplemental:
            use_minimal_output_format: True
    logging_and_saving_config:
        logging_steps: 10
        output_dir: stub_model
    dataset:
        - dataset_type: quality
          split_type: train
          flip_sides: True
    speech_structure: 
        - open_debate
Train - Llama3 - Judge:
    model_name: /vast/spa9663/models/base_models/llama3-8b-262k
    target: judge
    max_length: 32986
    requires_token: True
    opening_speeches_only: False
    llm_type: llama3
    training_hyperparameters:
        num_train_epochs: 3
        per_device_train_batch_size: 2
        gradient_accumulation_steps: 8
        optim: adamw_hf
        learning_rate: 2e-5
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
        supplemental:
            use_minimal_output_format: True
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/llama-3-judge
        merge_output_dir: /vast/spa9663/models/trained_models/llama-3-judge
    dataset:
        - dataset_type: quality
          split_type: train
          flip_sides: True
    speech_structure: 
        - open_debate
Train - Llama3 - Open Debater - SFT Base:
    model_name: /vast/spa9663/models/trained_models/llama-3-mega-merged
    target: judge
    max_length: 32986
    requires_token: True
    opening_speeches_only: False
    llm_type: llama3
    training_hyperparameters:
        num_train_epochs: 1
        per_device_train_batch_size: 2
        gradient_accumulation_steps: 8
        optim: adamw_hf
        learning_rate: 1e-5
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
        supplemental:
            use_minimal_output_format: True
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/llama-3-open-debater-sft-base
        merge_output_dir: /vast/spa9663/models/trained_models/llama-3-open-debater-sft-base
    dataset:
        - dataset_type: quality
          split_type: train
          flip_sides: True
    speech_structure: 
        - open_debate
Train - Llama3 - Human and GPT:
    model_name: /home/ubuntu/mars-arnesen-gh/{anonymised}/downloaded-models/gradientai/Llama-3-8B-Instruct-262k
    llm_type: llama3
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 2
        per_device_train_batch_size: 2
        gradient_accumulation_steps: 8
        optim: adamw_hf
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega
        merge_output_dir: /home/ubuntu/mars-arnesen-gh/{anonymised}/models/trained_models/llama-3-mega-merged
    dataset:
        - dataset_type: quality_debates
          full_dataset_file_path: /home/ubuntu/mars-arnesen-gh/{anonymised}/sft_data/debater/debater_combined.jsonl
        - dataset_type: external_huggingface
          full_dataset_file_path: yahma/alpaca-cleaned
Train - Llama3 - Human and GPT - Passage - 2:
    model_name: /vast/spa9663/models/base_models/llama3-8b-262k
    llm_type: llama3
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 2
        per_device_train_batch_size: 2
        gradient_accumulation_steps: 8
        optim: adamw_hf
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/llama-3-passage-2
        merge_output_dir: /vast/spa9663/models/trained_models/llama-3-passage-2
    dataset:
        - dataset_type: quality_debates
          full_dataset_file_path: /home/spa9663/debate-data/human_and_gpt4_debates.jsonl
        - dataset_type: external_huggingface
          full_dataset_file_path: yahma/alpaca-cleaned
Train - 13B - Alpaca:
    model_name: Yukang/LongAlpaca-13B
    target: debater
    opening_speeches_only: False
    llm_type: llama
    training_hyperparameters:
        num_train_epochs: 4
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_32bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/Llama-2-13B-32K-Neft-4/
        merge_output_dir: /vast/spa9663/models/trained_models/Llama-2-13B-32K-Merged-Neft-4/
    dataset:
        dataset_type: quality_debates
Train - Mixtral - Human:
    model_name: /vast/spa9663/models/base_models/mixtral-8x7b
    llm_type: mistral
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 4
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_32bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/mixtral-8x7b
        merge_output_dir: /vast/spa9663/models/trained_models/mixtral-8x7b-merged
    dataset:
        dataset_type: quality_debates
Train - Mixtral - Human and GPT:
    model_name: /vast/spa9663/models/base_models/mixtral-8x7b
    llm_type: mistral
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 3
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_32bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/mixtral-mega-adapter
        merge_output_dir: /vast/spa9663/models/trained_models/mixtral-mega
    dataset:
        - dataset_type: quality_debates
          full_dataset_file_path: /home/spa9663/debate-data/mega_human_and_gpt4_debates.jsonl
Train - Mixtral - Consultant - Human:
    model_name: /vast/spa9663/models/base_models/mixtral-8x7b
    llm_type: mistral
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 4
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_32bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/mixtral-consultant-adapter-human
        merge_output_dir: /vast/spa9663/models/trained_models/mixtral-consultant-adapter-human
    dataset:
        - dataset_type: quality_consultancy
    speech_structure: 
        - default_consultancy
Train - Mixtral - Consultant - Llama:
    model_name: Yukang/LongAlpaca-13B
    llm_type: mistral
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 4
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_32bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/mixtral-consultant-adapter-llama
        merge_output_dir: /vast/spa9663/models/trained_models/mixtral-consultant-adapter-llama
    dataset:
        - dataset_type: quality_consultancy
    speech_structure: 
        - default_consultancy
Train - Mixtral - Consultant - Human and GPT:
    model_name: /vast/spa9663/models/base_models/mixtral-8x7b
    llm_type: mistral
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 3
        per_device_train_batch_size: 4
        gradient_accumulation_steps: 4
        optim: paged_adamw_32bit
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/mixtral-consultant-adapter
        merge_output_dir: /vast/spa9663/models/trained_models/mixtral-consultant-adapter
    dataset:
        - dataset_type: quality_consultancy
          merge_train_and_val: True
          full_dataset_file_path: /home/spa9663/debate-data/human_and_gpt4_debates_and_consultancies.jsonl
    speech_structure: 
        - default_consultancy
Train - Llama3 - Consultant - Human and GPT:
    model_name: /vast/spa9663/models/base_models/llama3-8b-262k
    llm_type: llama3
    target: debater
    opening_speeches_only: False
    max_length: 32986
    training_hyperparameters:
        num_train_epochs: 2
        per_device_train_batch_size: 2
        gradient_accumulation_steps: 8
        optim: adamw_hf
        learning_rate: 2e-4
        max_grad_norm: 0.3
        warmup_ratio: 0.03
        lr_scheduler_type: constant
        peft_type: lora
    logging_and_saving_config:
        logging_steps: 10
        output_dir: /vast/spa9663/models/trained_models/llama-3-consultant
        merge_output_dir: /vast/spa9663/models/trained_models/llama-3-mega-consultant
    dataset:
        - dataset_type: quality_consultancy
          full_dataset_file_path: /home/spa9663/debate-data/human_and_gpt4_debates_and_consultancies.jsonl
        - dataset_type: external_huggingface
          full_dataset_file_path: yahma/alpaca-cleaned
    speech_structure: 
        - default_consultancy
