defaults:
  - optim: adam
  - datamodule: transfer_dm
  - trainer: basic_trainer
  - logger: wandb
  - callbacks: defualt_cb
  - _self_

visionClassification: True
train_exp_id:
validate_and_test: True # validate and test before training
with_earlystopping: True
use_lastModels: False
seed: 42
#num_clients: # will be taken from the train cfg

# FL hparams
fed_rounds: 100
fed_local_steps: 2
fed_all_clients: True
org_all_learners:
random_selection:    # should be a decimal that represent the percentage of the total clients to be selected, if left empty then no random selection will happen
debug: False
included_clients:

# FL methods params
method: # federated learning methods. should choose only one of the following to be True.
Fedavg: True
Fedprox: False
scaffold: False
moon: False
moon_model: moon_cnn
mu: 0 # for prox term
T: 1
scaffold_global_lr: 1
weighted_avg_for_fedavg: True
quick_fed: False  # to do the training without checkpoints and validation after every epoch
FT_version: False
# E-FL hparams
fed_max_local_E: 2
history_tracking: 3  # try 5 # keeping the history of how many global rounds
halfway_signal: 5  # try 7 # sending halfway_signal (in the middle of the current E) every how many global rounds
hist_threshold: 0.02  # was 0
signal_threshold: 0.015 # was 0.2
currentE:
defaultE: 5
track_round: False
double_check: False

# CC hparams
with_swa: False
with_pruning: False
which_pruning: l1_unstructured
pruning_amount: 0.5
CC_rounds: 3
make_prun_permanent: False
unweighted_phase1: False
with_ewa: False
Es: [1,2,5,15] # for EWA CC method, and for CC plan2 for the checkpoint_rounds
fed_per_class_acc:

# loading hparams
trasnsition_round: 15  #for the adaptive FedAvg, temp static strategy
special_load: False
pathdate: # for the cont. runs, to run extra rounds
start_load_from_round: 99
extra:

######

# data and losses hparams
teacher_client: [0,1,2,3,4,5,6,7,8,9] # TODO: ???
learner_client: 4
data_kd: False
my_kd: False
goal_class:
goal_classes:
custom_average: False
no_DW: False
avg_logits_loss: False  #FedDF, with setting no_DW=True
avg_weighted_logits_loss: False  #modified FedDF, using the DW for the logits (instead of for the KL-div loss, like MyKD)
use_my_weighted_avg: False
not_multiply_T: False
multiple_teachers:  # default is None, set value to int representing the number of teachers
KL_temperature: 1
KL_loss_strength: -1 # or alpha, -1 means it will be calculated automatically
random_alpha: False
weight_after_best_T: False
T_after_best_weight: False
use_pub_data: False
cal_acc: False
with_CE: True
with_learner_alpha: True
starting_student: "best"
same_initial: False # for the KD exps
num_classes_to_select: 1  # for the query in QKT (-1 for multi and random number of classes)
teacher_candidates:
query_goal_thresholds:
qkt_multi_teachers: True
qkt_unweighted_teachers: False
max_classes_fraction: -1
queries:
no_alpha: False
step1_T: 1
step1_ft_and_freeze: False
lora_ft: False
lora_combined_training: False
lora_combined_infer: False
with_EWC: False
with_EWC_fc: False
ewc_lambda: 0.4
clue_helper_model_avg: False  # the default for CLUE is helper_model_replace
clue_z_percent: 0.2
with_softMask: False
mask_loss_option: "combined_loss" # for the soft_mask approach, could also be "teacher_data" or "student_data"
num_significant_layers: 0
soft_mask_value: 0.1
use_all_samples: True
num_classes: 10
noise_threshold: 0.02
use_number_of_samples: False

use_all_teachers: True
filter_with_noise: False
alpha_data_free: False
normalize_alpha: True # for data_free alpha
teacher_sample_threshold: 0
goal_class_boost: 2
copy_of_self_as_teacher: False  # when we filter the teacher
only_goal_classes: False  # goal classes and learner classes for the alpha mask, be default.
measure_pre_transfer_acc: False # by default use the results from the train run

with_softMask_bn: False
with_softMask_channel: False
top_Z_percent: 0.10 #%
masking_type: "soft"
grad_norm_type: 'grad'
mask_fc: False
debug_mask: False
ash_method:  # for OOD, 'ash_p', 'ash_b' or 'ash_s'
with_odin: False  # another OOD approach
hard_mask: False
hard_mask_all_fc: False
detailed_testing: False
test_description:
freeze_backbone: False
two_stage_qkt: False
two_stage_starting_point: "qkt"
restore_important_params_after_training: False
avg_important_params_after_training: False
use_stage2_pytorch: False
num_epochs: 10
stage2: False
stage1_epochs:
stage2_epochs: # if not set, then will use the same number of epochs as stage1. can be set during the training
stage1_T:
mask_based_on_binary_weight: False # if true, besides masking irrelevant classes, for each teach we consider the relevant classes only if the teacher has it
no_masking: False
no_masking_no_compute: False
no_head_replacement: False
centralized_qkt: False
centralized_qkt_use_client_data: False
volunteer_client_id: 0
data_free_option:
personalized_qkt: False
predefined_queries:
qkt_save_models: False
###
use_val_data: False # for the dataPart exp
train_exp_id2: # the second train run that is based on another old dist. we need this to download the models
other_alpha: 0
sample_portion_from_val_set: 1 # decimal of the portion of the validation set we want to use (e.g, 0.1 is 10%). If =1 then we use the whole validation set
same_val: False
sampled_ind:

###
# other hparams
rl_t: True
is_bl: False
b: 1.1
is_c100: False
qfilename:
Tgoal: 2
dafl: False
qtable:
swapV:
total_reward:
state:
action:
round: 0


output_dir:

logger:
  group: FL


hydra:
    run:
        dir: ${output_dir}/transfer_exp/${now:%Y-%m-%d_%H-%M}