import torch
from offlinerl.utils.exp import select_free_cuda

task = "Hopper-v3"
task_data_type = "low"
task_train_num = 99

seed = 42

device = 'cuda'+":"+'1' if torch.cuda.is_available() else 'cpu'
obs_shape = None
act_shape = None
max_action = None

hidden_layer_size = 256
hidden_layers = 2
transition_layers = 4
layer_num = 5

transition_init_num = 7
transition_select_num = 5

real_data_ratio = 0.5

transition_batch_size = 256
policy_batch_size = 256
data_collection_per_epoch = 1000 #50e3
buffer_size = 20000 #1.2e6
steps_per_epoch = 1000
max_epoch = 1000

learnable_alpha = True
uncertainty_mode = 'aleatoric'
transition_lr = 1e-3
actor_lr = 1e-4
critic_lr = 3e-4
reward_scale=1
use_automatic_entropy_tuning=True
target_entropy = None
discount = 0.99
soft_target_tau = 5e-3

horizon = 5
lam = 2

# min Q
explore=1.0
temp=1.0
min_q_version=3
min_q_weight=1.0

# lagrange
with_lagrange=False
lagrange_thresh=10.0

# extra params
num_random=10
type_q_backup= "min"
q_backup_lmbda = 0.75
deterministic_backup=False

discrete = False

#tune
params_tune = {
    "buffer_size" : {"type" : "discrete", "value": [500, 1000]}, # 1e6, 2e6
    "real_data_ratio" : {"type" : "discrete", "value": [0.05, 0.1, 0.2]},
    "horzion" : {"type" : "discrete", "value": [1, 2, 5]},
    "lam" : {"type" : "continuous", "value": [0.1, 10]},
    "learnable_alpha" : {"type" : "discrete", "value": [True, False]},
    "actor_lr" : {"type" : "discrete", "value":[1e-4, 3e-4]},
    "min_q_version" : {"type" : "discrete", "value":[2, 3]},
    "min_q_weight" : {"type": "discrete", "value":[5, 10]},
    "lagrange_thresh" : {"type": "discrete", "value":[-1, 2, 5, 10]},
    "type_q_backup" : {"type": "discrete", "value":["max", "none"]},
}

#tune
grid_tune = {
    "horizon" : [1, 5],
    "lam" : [0.5, 1, 2, 5],
    "uncertainty_mode" : ['aleatoric', 'disagreement'],
    "min_q_version": [2, 3],
    "min_q_weight": [5, 10],
    "lagrange_thresh": [-1], #[-1, 2, 5, 10],
    "type_q_backup": ["min", "none"],
}