# Defaults applied to all methods with NL feedback
feedback_type: "nl_qa"
summarize_feedback: True
pairwise_model_type: "vanilla"
pairwise_pref_model_input_type: "x" # [x, y, none] - defines what kind of model: X->U or Y->U is used for selecting the pairs for labeling
feedback_acquisition_method: "none"
include_goals: True # whether to include the seeding message at the beginning of the conversation
init_method: "random" # [random, llm]
use_prior_knowledge: False
n_llm_pair_labels: 64 # The number K of pairs to be labeled by the LLM
chunk_size_llm_pair_labels: 8 # The chunks size S to be labled by the LLM at once
num_llm_samples: 1 # Number of LLM labels generated per each outcome pair
uprox_llm_model: "llama3.3-70b-instruct" # LLM model for the LILO agent
hf_llm_model: "llama3.3-70b-instruct" # LLM model for the DM simulator agent
