# Evaluation Configuration
# This file contains the default parameters for the EvalConfig class

# Model and experiment identifiers
experience_names: ["apibench", "mllm", "hugging-bench-1", "hugging-bench-2"]  # list of name of the experience, e.g., ["apibench", "mllm"] or ["apibench"]

# ORDERED list of LoRA adapters to use by experience or null. 
# For "rat-frozen" experiments (using the same adapter forzen after first experience), use the same adapter path multiple times
# e.g., ["apibench-only-hf/checkpoint-1040", "apibench-only-hf/checkpoint-1040", "apibench-only-hf/checkpoint-1040"] one for each experience
# For "joint training" also use the same adapter path multiple times
# SET THIS TO NULL if "retrievers_only" mode is used
lora_adapters: ["hugging-bench-2-joint-training/checkpoint-1610", "hugging-bench-2-joint-training/checkpoint-1610", "hugging-bench-2-joint-training/checkpoint-1610", "hugging-bench-2-joint-training/checkpoint-1610"]

variant_name: "joint-training"  # variant name for the output directory
# list of retriever if needed, e.g., ["bm25", "sentence_transformer"], ["splade"], null
retrievers: null 

# if using retrievers, specify an ORDERED list of model index to use on each experience 
#e.g. ["e1", "e1_e2"] choices: "e1", "e1_e2", "e1_e2_e3", "e1_e2_e3_e4", null
model_indices: null  

repo_id: "huggyllama/llama-7b"  # base model to use "huggyllama/llama-7b" "deepseek-ai/deepseek-coder-7b-instruct-v1.5"

# Input/Output settings
input_max_length: 256
max_new_tokens: 128
temperature: 0.35  # Lower temperature for more conservative predictions
do_sample: true

# Evaluation settings
eval_batch_size: 38 # 32 if no retriever, 8 with retriever
top_k: 10
top_p: 0.7 

# LoRA merging strategy settings
lora_merging_strategy: null  # ties, dare_linear, arithmetic_mean, or null
weights: [1.0, 1.0, 1.0, 1.0]  # use only when lora_merging_strategy is "ties" or "dare_linear"
density: 0.3  


# eval on training data
eval_on_train: false