# NOTE: relative path
population_path: 'random_agents/'
save_path: './cross_play_matrix.pickle'
agents: {
    "action-only RB seed=1": 1,
    #"action-only RB seed=2": 2,
    #"action-only RB seed=3": 3,
    "comm. RB seed=1": 1,
    "comm. RB seed=2": 2,
    "comm. RB seed=3": 3,
    "comm. RB seed=4": 4,
    "comm. RB seed=5": 5,
    "comm. RB seed=6": 6,
    "comm. RB seed=7": 7,
    "SAD seed=1":"/home/kevin/debug_ray/r2d2_comaze_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv100/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TESTING/PUBSUB/GoalOrderingPred-NoDropout+RulesPrediction+BigArch+RNNStatePostProcess-PredDictItemDetaching/SEED1/reloadFromSEED1WithRNNStatePostProcess/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    "SAD seed=2":"/home/kevin/debug_ray/r2d2_comaze_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv100/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TESTING/PUBSUB/GoalOrderingPred-NoDropout+RulesPrediction+BigArch+RNNStatePostProcess-PredDictItemDetaching/SEED1/reloadFromSEED2WithRNNStatePostProcess/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    "SAD seed=3":"/home/kevin/debug_ray/r2d2_comaze_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv100/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TESTING/PUBSUB/GoalOrderingPred-NoDropout+RulesPrediction+BigArch+RNNStatePostProcess/SEED1/reloadFromSEED3WithRNNStatesPostProcess/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    #"OP seed=1":"/home/kevin/debug_ray/r2d2_comaze_data/CSGPU2_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv100/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TESTING/PUBSUB/GoalOrderingPred-NoDropout+RulesPrediction+BigArch+RNNStatePostProcess-PredDictItemDetaching/SEED1/reloadFromSEED1/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    #"OP seed=2":"/home/kevin/debug_ray/r2d2_comaze_data/CSGPU2_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv100/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TESTING/PUBSUB/GoalOrderingPred-NoDropout+RulesPrediction+BigArch+RNNStatePostProcess-PredDictItemDetaching/SEED1/reloadFromSEED2/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    #"OP seed=3":"/home/kevin/debug_ray/r2d2_comaze_data/CSGPU2_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv100/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TESTING/PUBSUB/GoalOrderingPred-NoDropout+RulesPrediction+BigArch+RNNStatePostProcess-PredDictItemDetaching/SEED1/reloadFromSEED3/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    "SAD+ seed=1": "/home/kevin/debug_ray/r2d2_comaze_data/CoMaze-9x9-Dense-Level5-UniformSecrets-v0/100MaxSteps/PenalizeSecretGoalRuleBreaching-1-Reward1-Vocab20/venv64/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone/TRAINING/PUBSUB/GoalOrderingPred+Biasing-1m0-NoDropout+RulesPredictionONLY+RNNStatePostProcess+AugmentedHiddenStates/SEED1/testBiasingForRulesPredictionONLY/3step_SAD_VDN_aID_1m3Ent_r2d2_AdamLR6d25m5_EPS1m12_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_A2m0_gamma997_LargeCNN_MLPLSTM2Res_GradClip5m0_r1p5Min3e4_a9m1_b6m1_ovrN_e9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0_NOZeroInitSt_OnlineSt_StoreOnDone.agent",
    }
num_games_per_matchup: 10
num_matrices: 1

seed: 111
task: {
    'env-id': 'CoMaze-9x9-Dense-Level5-UniformSecrets-v0',
    
    'run-id': 'serial/crossplay/WithBN/ScalingFN_EPS1m3/Seed1_venv64_r2d2_EntropyReg0_WeightDecayReg0/',
    'agent-id': 'SAD_SAD_paper_3step_r2d2_AdamLR6d25m5_EPS1d5m5_L2AModelUpdate1Steps_EPSgreedyAPEX1m0_4m1OVER3p4_gamma997_LargeCNNLSTM_GradClip5m1_r5p4Min2e4_alpha9m1_beta6m1_over2e4_eta9m1_tau4m4_RepP1_NOBURNIN_b128_L20_O10_B0',
    
    'nbr_actor': 64,
    'sad': True,
    'clip_reward': False,
    'previous_reward_action': True,
    'observation_resize_dim': (56,56),
    }
