defaults:
  - _self_
hydra:
  run:
    dir: ${logdir}
_target_: agent.eval.eval_pick_place_reach.EvalPickPlaceReach

name: pick_place_eval_full_ta${horizon_steps}_td${denoising_steps}
logdir: ${oc.env:DPPO_LOG_DIR,/scratch4/workspace/placeholder-hdp1/dppo}/rlbench-eval/${name}/${now:%Y-%m-%d}_${now:%H-%M-%S}_${seed}

# Trained model checkpoint
# Without CFG:
base_policy_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench-pretrain/pick_place_pre_para_diffusion_film_cp_z3_ta64_td20_ddim8_size512/2025-12-17_00-49-31_42/checkpoint/best_model.pt
# With CFG:
# base_policy_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench-pretrain/pick_place_pre_para_diffusion_film_cp_cfg_z3_ta64_td20_ddim8_size512/2025-12-17_00-49-31_42/checkpoint/best_model.pt

# Normalization for state/action
normalization_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench_pick_place_data/stack_blocks/variation0/processed/normalization.npz

# REACH dataset paths
dataset_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench_pick_place_data/stack_blocks/variation0/processed/train_reach_normalized.npz
metadata_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench_pick_place_data/stack_blocks/variation0/processed/train_reach_metadata.npy
full_metadata_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench_pick_place_data/stack_blocks/variation0/train/train_metadata.npy

# CARRY dataset paths
carry_dataset_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench_pick_place_data/stack_blocks/variation0/processed/train_carry_normalized.npz
carry_metadata_path: /scratch4/workspace/placeholder-hdp1/dppo/rlbench_pick_place_data/stack_blocks/variation0/processed/train_carry_metadata.npy

# Evaluation settings
n_rollouts_per_cp: 10     # Number of rollouts per control point
use_random_noise: False   # Use deterministic noise (same seed each rollout)

# CP selection strategy: select CPs varying one z-dimension while keeping others fixed
# Options: "random", "vary_angle", "vary_dist", "vary_pos"
#
# vary_angle: 5 CPs with angles [0.0, 0.2, 0.4, 0.6, 0.8], fixed dist & pos
# vary_dist:  5 CPs with dist_frac [0.2, 0.4, 0.6, 0.8, 1.0], fixed angle & pos
# vary_pos:   3 CPs with pos_frac [0.35, 0.5, 0.65], fixed angle & dist
#
cp_selection_strategy: "vary_angle"

# Fixed values for the two dimensions not being varied
# For vary_angle: use fixed_dist_frac and fixed_pos_frac
# For vary_dist:  use fixed_angle and fixed_pos_frac
# For vary_pos:   use fixed_angle and fixed_dist_frac
fixed_angle: 0.4
fixed_dist_frac: 0.4
fixed_pos_frac: 0.5

seed: 42
device: cuda:0

# Model dimensions (must match training)
obs_dim: 22
action_dim: 8
z_dim: 3          # [angle, dist_frac, pos_frac] normalized
target_dim: 3     # end_pos xyz

# Diffusion settings
denoising_steps: 20
horizon_steps: 64
use_ddim: True
ddim_steps: 20  # Increased from 8 for better quality (can try 16 or full 20)

# Simulation settings
action_repeat: 10  # Physics steps per action (data collection uses 5, can increase for eval)

# Network architecture (must match training)
model:
  _target_: model.diffusion.parameterized_diffusion_eval.ParameterizedDiffusionEval
  predict_epsilon: True
  denoised_clip_value: 1.0
  randn_clip_value: 3
  network_path: ${base_policy_path}
  cfg_guidance_scale: 1.0  # Try stronger guidance (1.0=no guidance, >1=amplified)
  network:
    _target_: model.diffusion.parameterized_diffusion.FiLMDiffusionMLP
    time_dim: 16
    mlp_dims: [512, 512, 512]
    residual_style: True
    use_layernorm: True
    cond_dim: ${obs_dim}  # cond_steps=1, so obs_dim * cond_steps = obs_dim
    horizon_steps: ${horizon_steps}
    action_dim: ${action_dim}
    z_dim: ${z_dim}
    target_dim: ${target_dim}
    z_mlp_dims: [16, 32]
    film_at_layers: all
  horizon_steps: ${horizon_steps}
  obs_dim: ${obs_dim}
  action_dim: ${action_dim}
  denoising_steps: ${denoising_steps}
  device: ${device}
  use_ddim: ${use_ddim}
  ddim_steps: ${ddim_steps}
