# Configuration file of OpenImage training experiment
exp_path: $MAIN_PATH/core

# Entry function of executor and aggregator under $exp_path
executor_entry: executor.py

aggregator_entry: aggregator.py

auth:
    ssh_user: ""
    ssh_private_key: ~/.ssh/id_rsa

# cmd to run before we can indeed run oort (in order)
setup_commands:
    - source $CONDA_PATH/etc/profile.d/conda.sh
    - conda activate $CONDA_ENV
    - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PATH/lib

# ========== Additional job configuration ========== 
# Default parameters are specified in argParser.py, wherein more description of the parameter can be found

job_conf:
  - log_path: $MAIN_PATH/core/evals # Path of log files
  - job_name: openimage                   # Generate logs under this folder: log_path/job_name/time_stamp
  - total_worker: $WORKERS                      # Number of participants per round, we use K=10 in our paper, large K will be much slower
  - data_set: openImg                     # Dataset: openImg, google_speech, stackoverflow
  - data_dir: $DATA_PATH   # Path of the dataset
  - data_map_dir: $DATA_PATH/client_data_mapping             # Allocation of data to each client, turn to iid setting if not provided
  - device_conf_file: $MAIN_PATH/dataset/data/device_info/client_device_capacity     # Path of the client trace
  - device_avail_file: $MAIN_PATH/dataset/data/device_info/client_behave_trace
  - model: shufflenet_v2_x2_0                            # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2
  - sample_mode: $SAMPLER
  - gradient_policy: $AGGREGATOR                 # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default
  - eval_interval: 10                     # How many rounds to run a testing on the testing set
  - epochs: $EPOCHS                       # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
  - filter_less: 31                       # Remove clients w/ less than 21 samples
  - num_loaders: 1
  - yogi_eta: 3e-3
  - yogi_tau: 1e-8
  - local_steps: $LOCALSTEPS
  - learning_rate: 0.01
  - batch_size: 30
  - test_bsz: 30
  - malicious_factor: 0
  - use_wandb: 1
  - stale_update: $STALEUPDATES
  - deadline: $DEADLINE
  - target_ratio: $TARGETRATIO
  - last_worker: 0
  - random_behv: $RANDBEHV
  - avail_priority: $AVAILPRIO
  - avail_probability: $AVAILPROP
  - partitioning: $PARTITION
  - zipf_param: 1.95
  - filter_class_ratio: 0.1
  - total_clients: $CLIENTS
  #- used_samples: 50
  - exp_type: $EXPTYPE
  - overcommitment: $OVERCOMMIT
  - stale_factor: $STALE_FACTOR
  - train_ratio: 0.5
  - test_ratio: 0.5
  - send_delta: 0
  - model_boost: 0
  - stale_all: 0
  - adapt_selection: $ADAPT_SELECT
  - sample_seed: $SAMPLESEED
  - wandb_key: $WANDB_API_KEY
  - wandb_entity: $WANDB_ENTITY
  - wandb_tags: $TAGS
  - dropout_ratio: $DROPOUT_RATIO
  - scale_sys: $SCALE_SYS
  - scale_sys_percent: $SCALE_SYS_PERCENT
  - stale_beta: $STALE_BETA
  - data_map_file_characteristic_normalization: $DATA_PATH/client_data_mapping/train_client_characteristic_normalization.csv
  - data_map_file_characteristic: $DATA_PATH/client_data_mapping/train_client_characteristic.csv
  - client_num: 500
  - k_channel: $WORKERS
  - sample_mode: $SAMPLER
  - max_client_data_size: 6709
  - min_client_data_size: 20
  - use_validity: True
  - use_characteristic: True
  - utility_type: $UTILITY_TYPE              # loss, loss_multiply_efficiency