from copy import deepcopy

from .common import common_params

params = deepcopy(common_params)
params.update({
    'type': 'MultitaskCQL',
    'domain': 'antmaze',
    'task': 'multitask-large-play-v0',

    'log_dir': 'logs/',

    'exp_name': 'antmaze_multitask_large_play_cql_minq_5.0_lagrange_10.0_temp1.0_max_q_backup_lr1e-4_relabel_all_cuds'

})
params['kwargs'].update({
    'epoch_length': 1000,
    'eval_n_episodes': 3*7,

    'lr': 1e-4,
    'q_lr': 3e-4,
    # min Q
    'with_min_q': True,
    'min_q_for_real': False,
    'min_q_for_real_and_fake': False,
    'min_q_for_fake_only': False,
    'temp': 1.0,
    'min_q_version': 3,
    'min_q_weight': 5.0,
    'data_subtract': True,
    # extra params
    'num_random': 10,
    'max_q_backup': True,
    'deterministic_backup': False,
    # lagrange
    'with_lagrange': True,
    'lagrange_thresh': 10.0,

    'model_train_freq': 1000,
    'model_retain_epochs': 10,
    'rollout_batch_size': 10e3,
    'deterministic': False,
    'rollout_random': False,
    'num_networks': 7,
    'num_elites': 5,
    'real_ratio': 1.0,
    'target_entropy': -3,
    'max_model_t': None,
    'rollout_length': 1,

    'num_tasks': 7,
    'use_relabel_weights': True,
    'use_relabel_weights_diff': False,
    'relabel_balance_batch': True,
    'relabel_weight_temp': 10.0,
    'relabel_weight_temp_adaptive': True,
    'relabel_weight_temp_tau': 0.995,
    'relabel_weight_temp_min': 10.0,
    'relabel_weight_temp_max': 100.0,
    'relabel_weight_orig_task': True,

    'sn': False,
    'separate_mean_var': False,
    'penalty_learned_var': False,
    'penalty_coeff': 0.0,
    'pool_load_path': 'd4rl/antmaze-large-play-v0',
    'pool_load_max_size': 8 * 10**6,
    'model_name': 'antmaze_multitask_large_play_sn_smv_1_0',
    'multitask_type': 'relabel-all'
})