dataset: medium-expert
device: cuda
env: metaworld_dial-turn-v2
seed: 0
# preference
feedback_num: 1000
data_quality: 3.0
segment_size: 25
normalize: true
threshold: 0.5
data_aug: none
q_budget: 1
feedback_type: RLT
model_type: BT
noise: 0.0
human: false
# MLP
epochs: 300
batch_size: 512
activation: tanh
lr: 1e-3
hidden_sizes: 128
ensemble_num: 3
ensemble_method: mean