dataset_root: /scratch
project_root: /home/ctrl-sim-dev
nocturne_waymo_data_folder: ${dataset_root}/nocturne_waymo
nocturne_waymo_train_folder: ${nocturne_waymo_data_folder}/formatted_json_v2_no_tl_train
nocturne_waymo_val_folder: ${nocturne_waymo_data_folder}/formatted_json_v2_no_tl_valid
nocturne_waymo_val_interactive_folder: ${nocturne_waymo_data_folder}/formatted_json_v2_no_tl_valid_interactive
lightning_log_root: /home/lightning_logs

# parameters for collecting offline rl dataset
offline_rl:
  output_data_folder_train: ${dataset_root}/offline_rl/train_march_new
  output_data_folder_val: ${dataset_root}/offline_rl/val_march_new
  output_data_folder_val_interactive: ${dataset_root}/offline_rl/val_interactive_march_new
  output_data_folder_test: ${dataset_root}/offline_rl/test_march_new
  mode: train
  chunk_idx: 0
  chunk_size: 25000
  visualize: False
  movie_dir: ${project_root}/videos

preprocess_rl_waymo:
  mode: train
  chunk_idx: 0
  chunk_size: 25000

datasets:
  rl_waymo:
    dataset_path: ${dataset_root}/offline_rl
    preprocess: True
    preprocess_dir: ${dataset_root}/preprocess
    train_context_length: 32 # following trajeglish (64 for larger model)
    num_agent_types: 5
    num_road_types: 8
    map_attr: 2
    k_attr: 7
    max_num_agents: 24 # following trajeglish
    agent_dist_threshold: 60.0 # following trajeglish
    map_dist_threshold: 100.0 # following trajeglish
    max_timestep: 90

    parked_car_velocity_threshold: 0.05

    max_accel: 10.0
    min_accel: -10.0
    max_steer: 0.7
    min_steer: -0.7

    max_veh_veh_distance: 15.0
    dist_to_road_edge_scaling_factor: 15.0
    veh_veh_collision_rew_multiplier: 10.0
    veh_edge_collision_rew_multiplier: 10.0
    pos_goal_shaped_min: 0
    pos_goal_shaped_max: 0.2
    pos_target_achieved_rew_multiplier: 10.0

    moving_threshold: 0.05

    accel_discretization: 20
    steer_discretization: 50 
    rtg_discretization: 350

    max_num_road_polylines: 200 # trajeglish uses 96
    max_num_road_pts_per_polyline: 100
    road_polyline_stride:
      less_than_10: 1
      less_than_20: 2
      less_than_50: 5
      at_least_50: 10

    use_veh_edge_rtg: True

    decision_transformer: False
    
    min_rtg_pos: 0
    max_rtg_pos: 10
    min_rtg_yaw: 0
    max_rtg_yaw: 110
    min_rtg_vel: 0
    max_rtg_vel: 110
    min_rtg_veh: -10
    max_rtg_veh: 90 # 100
    min_rtg_road: -10
    max_rtg_road: 90 # 350

    preprocess_simulated_data: False
    simulated_dataset: ${dataset_root}/simulated_offline_rl_data_config300
    simulated_dataset_preprocessed_dir: ${dataset_root}/simulated_preprocessed_data_config300
    replay_ratio: 0.5
    center_on_focal_agent: True
    supervise_focal_agent: True

    goal_fix: True
    goal_dim: 5
    remove_shaped_goal: True
    remove_shaped_veh_reward: False
    remove_shaped_edge_reward: False
    only_shaped_goal: False
    only_shaped_veh_reward: False
    only_shaped_edge_reward: False
  
  rl_waymo_diffusion:
    dataset_path: ${dataset_root}/offline_rl
    preprocess: True
    preprocess_dir: ${dataset_root}/preprocess
    train_context_length: 32 # following trajeglish (64 for larger model)
    input_horizon: 10
    num_agent_types: 5
    num_road_types: 8
    map_attr: 2
    k_attr: 7
    action_dim: 2
    max_num_agents: 24 # following trajeglish
    agent_dist_threshold: 60.0 # following trajeglish
    map_dist_threshold: 100.0 # following trajeglish
    max_timestep: 90

    parked_car_velocity_threshold: 0.05

    max_accel: 10.0
    min_accel: -10.0
    max_steer: 0.7
    min_steer: -0.7

    state_normalizer: 
      pos_div: 100.0
      vel_div: 40.0

    max_veh_veh_distance: 15.0
    dist_to_road_edge_scaling_factor: 15.0
    veh_veh_collision_rew_multiplier: 10.0
    veh_edge_collision_rew_multiplier: 10.0
    pos_goal_shaped_min: 0
    pos_goal_shaped_max: 0.2
    pos_target_achieved_rew_multiplier: 10.0

    moving_threshold: 0.05

    accel_discretization: 20
    steer_discretization: 50 
    rtg_discretization: 350

    max_num_road_polylines: 100 # trajeglish uses 96
    max_num_road_pts_per_polyline: 100
    road_polyline_stride:
      less_than_10: 1
      less_than_20: 2
      less_than_50: 5
      at_least_50: 10

    use_veh_edge_rtg: True

    decision_transformer: False
    
    min_rtg_pos: 0
    max_rtg_pos: 10
    min_rtg_yaw: 0
    max_rtg_yaw: 110
    min_rtg_vel: 0
    max_rtg_vel: 110
    min_rtg_veh: -10
    max_rtg_veh: 90 # 100
    min_rtg_road: -10
    max_rtg_road: 90 # 350
    num_reward_components: 3

    preprocess_simulated_data: False
    simulated_dataset: ${dataset_root}/simulated_offline_rl_data_config300
    simulated_dataset_preprocessed_dir: ${dataset_root}/simulated_preprocessed_data_config300
    replay_ratio: 0.9
    center_on_focal_agent: False

    goal_fix: True
    goal_dim: 5
    remove_shaped_goal: True
    remove_shaped_veh_reward: False
    remove_shaped_edge_reward: False
    only_shaped_goal: False
    only_shaped_veh_reward: False
    only_shaped_edge_reward: False

    future_relative_encoding: False

adversary:
  model_path: /home/wandb/config342/model_finetuning.ckpt
  # For DT
  real_time_rewards: False 
  # For CtRL-Sim and DT
  goal_tilt: 0
  veh_veh_tilt: -10
  veh_edge_tilt: 0
  action_temperature: 1.0
  nucleus_sampling: False 
  nucleus_threshold: 0.8 
  tilt_ego: False

planner:
  model_path: /home/wandb/config311/model.ckpt
  goal_tilt: 10
  veh_veh_tilt: 10
  veh_edge_tilt: 10
  action_temperature: 1.0
  nucleus_sampling: False 
  nucleus_threshold: 0.8 

planner_eval:
  # log_replay, ctrl_sim_negative, ctrl_sim, dt, cat, cat_ctrl_sim
  adversary: ctrl_sim_negative
  # log replay OR ctrl_sim planner OR pdm_closed planner
  planner: ctrl_sim_planner
  history_steps: 10 
  verbose: True 
  seed: 0
  visualize: False
  num_files_to_evaluate: 1000

eval:
  model_path: /home/wandb/config311/model.ckpt
  movie_path: ${project_root}/movies
  visualize: False
  history_steps: 10
  veh_veh_tilt: 0
  veh_edge_tilt: 0
  goal_tilt: 0
  action_temperature: 1.5
  interesting_traj_len_threshold: 60
  interesting_goal_dist_threshold: 10
  interesting_timestep_diff_threshold: 20
  nucleus_sampling: False
  nucleus_threshold: 0.8
  multi_agent_eval_threshold: 8
  num_files_to_evaluate: 1000
  # one_agent, two_agent, multi_agent
  eval_mode: one_agent
  verbose: True
  seed: 1
  real_time_rewards: False
  privileged_return: False
  average_return: False # TODO: aerage return values not computed
  max_return: False
  min_return: False

eval_diffusion:
  use_eval_diffusion_steps: False
  n_eval_diffusion_step: 50
  sampling_frequency: 2 # 10 Hz
  partition: 0  # we assume four partitions of the eval. [0, 1, 2, 3]
  use_guidance: False
  n_guidance_steps: 2
  guide_scale: 0.1

collect_sim:
  num_files_to_collect: 200
  start_idx: 1006
  # veh_veh or veh_edge
  tilt_type: veh_veh
  max_tilt: 0
  min_tilt: -50
  seed: 42
  output_dir: ${dataset_root}/robust_offline_rl_data

train_finetuning:
  max_steps: 31240 # (100000 / 64) * 20 # 11160 for CAT finetuning
  warmup_steps: 500 
  lr: 5e-4

train:
  seed: 0
  accelerator: auto
  devices: 4 # set to 4 when multi-gpu training
  max_steps: 200000
  warmup_steps: 500 # NOT IMPLEMENTED
  lr: 5e-4
  weight_decay: 1e-4
  check_val_every_n_epoch: 1
  precision: 32-true # bf16-mixed for bfloat16 mixed precision or 32-true for regular
  limit_train_batches: 1.0 # TODO: change back to 1.0
  gradient_clip_val: 10.0
  run_name: test
  track: False

  datamodule:
    train_batch_size: 16
    val_batch_size: 16
    num_workers: 6
    # 1., 0.5, 0.1
    pin_memory: True

  model:
    hidden_dim: 256 # try 32, 64, 256, 512
    map_attr: 3
    num_road_types: 8
    num_reward_components: 3
    num_transformer_encoder_layers: 2
    num_decoder_layers: 4
    no_actions: False
    num_heads: 8
    dim_feedforward: 1024
    dropout: 0.1
    state_dim: 12
    predict_rtg: True
    use_map: True
    goal_dropout: 0.1
    max_pool_map: True
    encode_initial_state: True
    predict_future_states: True
    loss_action_coef: 1.
    local_frame_predictions: False
    supervise_moving: True
    trajeglish: False
    il: False
    attend_own_return_action: False

train_diffusion:
  seed: 0
  accelerator: auto
  devices: 4 # set to 4 when multi-gpu training
  max_steps: 200000
  warmup_steps: 500 # NOT IMPLEMENTED
  lr: 2e-4
  weight_decay: 1e-4
  check_val_every_n_epoch: 1
  precision: 32-true # bf16-mixed for bfloat16 mixed precision or 32-true for regular
  limit_train_batches: 1.0 # TODO: change back to 1.0
  limit_val_batches: 0.1
  gradient_clip_val: 10.0
  run_name: test
  track: False
  loss_type: l2
  gradient_accumulate_every: 2
  ema_decay: 0.995

  datamodule:
    train_batch_size: 16
    val_batch_size: 16
    num_workers: 6
    # 1., 0.5, 0.1
    pin_memory: True

  model:
    hidden_dim: 256 # try 32, 64, 256, 512
    num_road_types: 8
    num_reward_components: 3
    num_transformer_encoder_layers: 2
    no_actions: False
    num_heads: 8
    dim_feedforward: 1024
    dropout: 0.1
    use_rtg: False
    predict_rtg: False
    use_map: True
    goal_dropout: 0.1
    max_pool_map: True
    encode_initial_state: True
    # actions_only, states_only, states_actions
    diffusion_type: states_actions
    loss_action_coef: 1.
    n_diffusion_steps: 100
    action_weight: 10
    loss_discount: 1
    predict_epsilon: False
    returns_condition: True
    condition_dropout: 0.25
    condition_guidance_w: 1.2
    test_ret: 0.9
    supervise_moving: True

# parameters relevant to the nocturne simulator
nocturne:
  collision_fix: True
  
  steps: 90
  dt: 0.1
  history_steps: 10

  # parameters fed into nocturne Simulation object
  scenario:
    # initial timestep of the scenario (which ranges from timesteps 0 to 90)
    start_time: 0
    # if set to True, non-vehicle objects (eg. cyclists, pedestrians...) will be spawned
    allow_non_vehicles: False
    # for an object to be included into moving_objects
    moving_threshold: 0.2  # its goal must be at least this distance from its initial position
    speed_threshold: 0.05  # its speed must be superior to this value at some point
    # maximum number of each objects visible in the object state
    # if there are more objects, the closest ones are prioritized
    # if there are less objects, the features vector is padded with zeros
    max_visible_objects: 16
    max_visible_road_points: 1000
    max_visible_traffic_lights: 20
    max_visible_stop_signs: 4
    # from the set of road points that comprise each polyline, we take
    # every n-th one of these
    sample_every_n: 1
    # if true we add all the road-edges (the edges you can collide with)
    # to the visible road points first and only add the other points
    # (road lines, lane lines) etc. if we have remaining states after
    road_edge_first: False

  rew_cfg:
    shared_reward: False # agents get the collective reward instead of individual rewards
    goal_tolerance: 0.5
    reward_scaling: 1.0 # rescale all the rewards by this value. This can help w/ some learning algorithms
    collision_penalty: 0
    shaped_goal_distance_scaling: 0.2
    shaped_goal_distance: True
    goal_distance_penalty: False # if shaped_goal_distance is true, then when this is True the goal distance 
                                # is a penalty for being far from 
                                # goal instead of a reward for being close
    goal_achieved_bonus: ${episode_length}
    # goal is only achieved if you're within this tolerance on distance from goal
    position_target: True
    position_target_tolerance: 1.0
    # goal is only achieved if you're within this tolerance on final agent speed at goal position
    speed_target: True
    speed_target_tolerance: 1.0
    # goal is only achieved if you're within this tolerance on final agent heading at goal position
    heading_target: True
    heading_target_tolerance: 0.3

hydra:
  run:
    dir: ${project_root}/slurm_logs/${now:%Y.%m.%d}/${now:%H.%M.%S}/${hydra.job.override_dirname}