{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Warning: Flow failed to import. Set the environment variable D4RL_SUPPRESS_IMPORT_ERROR=1 to suppress this message.\n",
      "No module named 'flow'\n",
      "Warning: CARLA failed to import. Set the environment variable D4RL_SUPPRESS_IMPORT_ERROR=1 to suppress this message.\n",
      "No module named 'carla'\n"
     ]
    }
   ],
   "source": [
    "import d3rlpy\n",
    "import torch\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "from data.makeSeaquestdata import load_seaquest_dataset, make_seaquest_testset\n",
    "from data.makeHalfcheetahdata import load_halfcheetah_dataset, make_halfcheetah_testset\n",
    "from load_model import load_seaquest_model, load_halfcheetah_model\n",
    "from utils import create_trajectories, get_trajectory_embedding, perform_clustering_and_plot, trajectory_attributions_hc, set_device_seeding, print_results_hc\n",
    "from model import make_episodes, fit_discrete_sac, fit_per_cluster, fit_sac\n",
    "from encoder import CustomCNNFactory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = set_device_seeding(seed=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\tgome\\Desktop\\FACT\\factvenv\\lib\\site-packages\\gym\\spaces\\box.py:84: UserWarning: \u001b[33mWARN: Box bound precision lowered by casting to float32\u001b[0m\n",
      "  logger.warn(f\"Box bound precision lowered by casting to {self.dtype}\")\n",
      "load datafile: 100%|██████████| 21/21 [00:02<00:00,  7.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset extracted with 50000 samples.\n",
      "Information about the dataset:\n",
      "Observation shape:  (50000, 17)\n",
      "Action shape:  (50000, 6)\n",
      "Reward shape:  (50000,)\n",
      "Action space:  (6,)\n",
      "Observation space:  (17,)\n",
      "Environment 'halfcheetah-medium-v2' initialized.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing sub'trajectories: 100%|█████████▉| 49899/49911 [00:05<00:00, 8450.51it/s] \n",
      "Processing trajectories:   0%|          | 0/1996 [00:00<?, ?it/s]c:\\Users\\tgome\\Desktop\\FACT\\factvenv\\lib\\site-packages\\torch\\cuda\\amp\\autocast_mode.py:114: UserWarning: torch.cuda.amp.autocast only affects CUDA ops, but CUDA is not available.  Disabling.\n",
      "  warnings.warn(\"torch.cuda.amp.autocast only affects CUDA ops, but CUDA is not available.  Disabling.\")\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Halfcheetah data obtained!\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing trajectories: 100%|██████████| 1996/1996 [01:15<00:00, 26.42it/s]\n"
     ]
    }
   ],
   "source": [
    "halfcheetahdata, hc_env = load_halfcheetah_dataset(env_name='halfcheetah-medium-v2', size=500*100, seed=0)\n",
    "pre_trained_encoder_halfcheetah = load_halfcheetah_model(\"trajectory_transformer/logs/halfcheetah-medium-v2/gpt/pretrained\", seed=0)\n",
    "halfcheetahdata, test_observation_hc = make_halfcheetah_testset(halfcheetahdata, test_size=200)\n",
    "\n",
    "\n",
    "# HalfCheetah data\n",
    "final_obs_hc, final_act_hc, final_rew_hc, final_ter_hc = create_trajectories(\n",
    "    halfcheetahdata[\"observations\"], halfcheetahdata[\"actions\"], \n",
    "    halfcheetahdata[\"rewards\"], halfcheetahdata[\"terminals\"], trajectory_length=25\n",
    ")\n",
    "\n",
    "print('Halfcheetah data obtained!')\n",
    "\n",
    "trajectory_embedding_halfcheetah = get_trajectory_embedding(\n",
    "    pre_trained_encoder_halfcheetah, final_obs_hc, final_act_hc, final_rew_hc, is_seaquest=False, device = device\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "clusters_halfcheetah, _ = perform_clustering_and_plot(trajectory_embedding_halfcheetah.detach().cpu().numpy(), 10, 10, ccore=True, plot=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:28.56 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:28.56 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002856\n",
      "2024-02-02 00:28.56 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:28.56 [debug    ] Building models...\n",
      "2024-02-02 00:28.56 [debug    ] Models have been built.\n",
      "2024-02-02 00:28.56 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002856\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 15.63it/s, temp_loss=9.99, temp=1, critic_loss=147, actor_loss=-4.02]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:28.56 [info     ] SAC_20240202002856: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0027399301528930665, 'time_algorithm_update': 0.0596752405166626, 'temp_loss': 10.039063358306885, 'temp': 0.9983514606952667, 'critic_loss': 143.48310546875, 'actor_loss': -4.2723876476287845, 'time_step': 0.06284396648406983} step=10\n",
      "2024-02-02 00:28.57 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 16.54it/s, temp_loss=10.1, temp=0.997, critic_loss=135, actor_loss=-4.51]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:28.57 [info     ] SAC_20240202002856: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.00291903018951416, 'time_algorithm_update': 0.0559603214263916, 'temp_loss': 10.055249500274659, 'temp': 0.9953596413135528, 'critic_loss': 127.53994522094726, 'actor_loss': -4.741487741470337, 'time_step': 0.05932912826538086} step=20\n",
      "2024-02-02 00:28.57 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 13.03it/s, temp_loss=10, temp=0.994, critic_loss=115, actor_loss=-5.03]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:28.58 [info     ] SAC_20240202002856: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.004182887077331543, 'time_algorithm_update': 0.07043371200561524, 'temp_loss': 10.01520071029663, 'temp': 0.9923777759075165, 'critic_loss': 104.02678070068359, 'actor_loss': -5.330809259414673, 'time_step': 0.07528128623962402} step=30\n",
      "2024-02-02 00:28.58 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 14.02it/s, temp_loss=10, temp=0.991, critic_loss=89.4, actor_loss=-5.74]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:28.59 [info     ] SAC_20240202002856: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.0037098169326782227, 'time_algorithm_update': 0.06479361057281494, 'temp_loss': 9.973664951324462, 'temp': 0.9894090235233307, 'critic_loss': 77.94114608764649, 'actor_loss': -6.164034175872803, 'time_step': 0.06977982521057129} step=40\n",
      "2024-02-02 00:28.59 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 13.81it/s, temp_loss=9.97, temp=0.988, critic_loss=59.5, actor_loss=-6.72]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:28.59 [info     ] SAC_20240202002856: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.003196525573730469, 'time_algorithm_update': 0.06812136173248291, 'temp_loss': 9.951050281524658, 'temp': 0.9864546418190002, 'critic_loss': 46.559478759765625, 'actor_loss': -7.245564317703247, 'time_step': 0.07167000770568847} step=50\n",
      "2024-02-02 00:28.59 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 15.74it/s, temp_loss=9.91, temp=0.985, critic_loss=35.3, actor_loss=-7.94]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.00 [info     ] SAC_20240202002856: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0029449224472045898, 'time_algorithm_update': 0.05928125381469727, 'temp_loss': 9.907215595245361, 'temp': 0.9835139095783234, 'critic_loss': 23.716598892211913, 'actor_loss': -8.609836626052857, 'time_step': 0.06257815361022949} step=60\n",
      "2024-02-02 00:29.00 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 16.11it/s, temp_loss=9.91, temp=0.982, critic_loss=13.9, actor_loss=-9.34]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.01 [info     ] SAC_20240202002856: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.003552103042602539, 'time_algorithm_update': 0.05765695571899414, 'temp_loss': 9.830208778381348, 'temp': 0.9805884957313538, 'critic_loss': 10.040040588378906, 'actor_loss': -10.062280654907227, 'time_step': 0.061369895935058594} step=70\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.01 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 17.60it/s, temp_loss=9.78, temp=0.979, critic_loss=7.35, actor_loss=-10.8]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.01 [info     ] SAC_20240202002856: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.0028636932373046877, 'time_algorithm_update': 0.05210764408111572, 'temp_loss': 9.658223247528076, 'temp': 0.9776878833770752, 'critic_loss': 7.841440916061401, 'actor_loss': -11.114001750946045, 'time_step': 0.05544257164001465} step=80\n",
      "2024-02-02 00:29.01 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 20.55it/s, temp_loss=9.58, temp=0.976, critic_loss=8.41, actor_loss=-11.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.02 [info     ] SAC_20240202002856: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.0027684926986694335, 'time_algorithm_update': 0.04452013969421387, 'temp_loss': 9.491847515106201, 'temp': 0.9748245596885681, 'critic_loss': 8.401556873321534, 'actor_loss': -11.304387092590332, 'time_step': 0.04743669033050537} step=90\n",
      "2024-02-02 00:29.02 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 19.91it/s, temp_loss=9.38, temp=0.973, critic_loss=7.29, actor_loss=-11.1]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.02 [info     ] SAC_20240202002856: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0027948617935180664, 'time_algorithm_update': 0.04662342071533203, 'temp_loss': 9.470063877105712, 'temp': 0.9719967544078827, 'critic_loss': 7.862465763092041, 'actor_loss': -11.03729181289673, 'time_step': 0.04969944953918457} step=100\n",
      "2024-02-02 00:29.02 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002856\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 0 has 336 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.07 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.08 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002907\n",
      "2024-02-02 00:29.08 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.08 [debug    ] Building models...\n",
      "2024-02-02 00:29.08 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.08 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002907\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 19.70it/s, temp_loss=10, temp=1, critic_loss=151, actor_loss=-4.04]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.08 [info     ] SAC_20240202002907: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0023681640625, 'time_algorithm_update': 0.047631573677062986, 'temp_loss': 10.046057319641113, 'temp': 0.9983515739440918, 'critic_loss': 143.7070281982422, 'actor_loss': -4.245231056213379, 'time_step': 0.05034966468811035} step=10\n",
      "2024-02-02 00:29.08 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 20.32it/s, temp_loss=10, temp=0.997, critic_loss=131, actor_loss=-4.54]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.09 [info     ] SAC_20240202002907: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0021006107330322266, 'time_algorithm_update': 0.04610750675201416, 'temp_loss': 10.051844882965089, 'temp': 0.9953602194786072, 'critic_loss': 126.34270477294922, 'actor_loss': -4.732918643951416, 'time_step': 0.048407578468322755} step=20\n",
      "2024-02-02 00:29.09 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 19.98it/s, temp_loss=10, temp=0.994, critic_loss=117, actor_loss=-4.98]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.09 [info     ] SAC_20240202002907: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.002312898635864258, 'time_algorithm_update': 0.04562666416168213, 'temp_loss': 10.019910049438476, 'temp': 0.9923787355422974, 'critic_loss': 108.13650817871094, 'actor_loss': -5.299998188018799, 'time_step': 0.04793956279754639} step=30\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.09 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 15.80it/s, temp_loss=9.99, temp=0.991, critic_loss=91.8, actor_loss=-5.69]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.10 [info     ] SAC_20240202002907: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.003082084655761719, 'time_algorithm_update': 0.05859348773956299, 'temp_loss': 9.991365337371827, 'temp': 0.9894099950790405, 'critic_loss': 81.57457885742187, 'actor_loss': -6.09074444770813, 'time_step': 0.06211729049682617} step=40\n",
      "2024-02-02 00:29.10 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 18.22it/s, temp_loss=9.97, temp=0.988, critic_loss=70.8, actor_loss=-6.62]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.10 [info     ] SAC_20240202002907: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0028115034103393553, 'time_algorithm_update': 0.050984430313110354, 'temp_loss': 9.943325805664063, 'temp': 0.9864543974399567, 'critic_loss': 56.34404411315918, 'actor_loss': -7.099079275131226, 'time_step': 0.05397789478302002} step=50\n",
      "2024-02-02 00:29.10 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 22.54it/s, temp_loss=9.92, temp=0.985, critic_loss=39.7, actor_loss=-7.78]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.11 [info     ] SAC_20240202002907: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0022650957107543945, 'time_algorithm_update': 0.04151682853698731, 'temp_loss': 9.906036758422852, 'temp': 0.9835141837596894, 'critic_loss': 30.130842399597167, 'actor_loss': -8.353482055664063, 'time_step': 0.04406051635742188} step=60\n",
      "2024-02-02 00:29.11 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 17.86it/s, temp_loss=9.86, temp=0.982, critic_loss=18.3, actor_loss=-9.06]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.11 [info     ] SAC_20240202002907: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.0023616790771484376, 'time_algorithm_update': 0.05233685970306397, 'temp_loss': 9.840134620666504, 'temp': 0.9805899441242218, 'critic_loss': 13.323640441894531, 'actor_loss': -9.706374454498292, 'time_step': 0.055194973945617676} step=70\n",
      "2024-02-02 00:29.11 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 21.54it/s, temp_loss=9.74, temp=0.979, critic_loss=8.4, actor_loss=-10.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.12 [info     ] SAC_20240202002907: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.0023700952529907226, 'time_algorithm_update': 0.042787933349609376, 'temp_loss': 9.737698650360107, 'temp': 0.9776856184005738, 'critic_loss': 7.65612907409668, 'actor_loss': -10.827130889892578, 'time_step': 0.04575676918029785} step=80\n",
      "2024-02-02 00:29.12 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 19.72it/s, temp_loss=9.61, temp=0.976, critic_loss=8.73, actor_loss=-11.3]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.12 [info     ] SAC_20240202002907: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.0028813362121582033, 'time_algorithm_update': 0.04691059589385986, 'temp_loss': 9.557931518554687, 'temp': 0.974809992313385, 'critic_loss': 8.269952154159546, 'actor_loss': -11.339824295043945, 'time_step': 0.04996490478515625} step=90\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.12 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 21.95it/s, temp_loss=9.47, temp=0.973, critic_loss=8.18, actor_loss=-11.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.13 [info     ] SAC_20240202002907: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.002331376075744629, 'time_algorithm_update': 0.04193449020385742, 'temp_loss': 9.463914775848389, 'temp': 0.9719730913639069, 'critic_loss': 8.089806413650512, 'actor_loss': -11.19092617034912, 'time_step': 0.045124387741088866} step=100\n",
      "2024-02-02 00:29.13 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002907\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 1 has 149 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.15 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.15 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002915\n",
      "2024-02-02 00:29.15 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.15 [debug    ] Building models...\n",
      "2024-02-02 00:29.15 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.15 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002915\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 20.28it/s, temp_loss=10, temp=1, critic_loss=151, actor_loss=-3.93]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.15 [info     ] SAC_20240202002915: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0018758535385131835, 'time_algorithm_update': 0.046067237854003906, 'temp_loss': 10.053455924987793, 'temp': 0.9983515322208405, 'critic_loss': 143.0130401611328, 'actor_loss': -4.2223129034042355, 'time_step': 0.0485992431640625} step=10\n",
      "2024-02-02 00:29.15 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 20.74it/s, temp_loss=10.1, temp=0.997, critic_loss=136, actor_loss=-4.56]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.16 [info     ] SAC_20240202002915: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.002628922462463379, 'time_algorithm_update': 0.045191168785095215, 'temp_loss': 10.040992927551269, 'temp': 0.995360255241394, 'critic_loss': 126.70473403930664, 'actor_loss': -4.739901351928711, 'time_step': 0.047919845581054686} step=20\n",
      "2024-02-02 00:29.16 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 17.73it/s, temp_loss=10, temp=0.994, critic_loss=118, actor_loss=-5.09]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.16 [info     ] SAC_20240202002915: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.0028862714767456054, 'time_algorithm_update': 0.0525087833404541, 'temp_loss': 10.020684337615966, 'temp': 0.9923798203468323, 'critic_loss': 103.34262161254883, 'actor_loss': -5.376384496688843, 'time_step': 0.05559451580047607} step=30\n",
      "2024-02-02 00:29.16 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 17.85it/s, temp_loss=9.99, temp=0.991, critic_loss=86.1, actor_loss=-5.87]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.17 [info     ] SAC_20240202002915: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.002909421920776367, 'time_algorithm_update': 0.05151536464691162, 'temp_loss': 9.970125007629395, 'temp': 0.9894120037555695, 'critic_loss': 74.83494415283204, 'actor_loss': -6.258380270004272, 'time_step': 0.05467233657836914} step=40\n",
      "2024-02-02 00:29.17 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 18.42it/s, temp_loss=9.99, temp=0.988, critic_loss=60.3, actor_loss=-6.84]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.17 [info     ] SAC_20240202002915: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0029439449310302733, 'time_algorithm_update': 0.05009722709655762, 'temp_loss': 9.945212078094482, 'temp': 0.9864582598209382, 'critic_loss': 46.55029220581055, 'actor_loss': -7.376073789596558, 'time_step': 0.053387451171875} step=50\n",
      "2024-02-02 00:29.17 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 21.67it/s, temp_loss=9.88, temp=0.985, critic_loss=29.4, actor_loss=-8.18]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.18 [info     ] SAC_20240202002915: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0025994062423706056, 'time_algorithm_update': 0.042400789260864255, 'temp_loss': 9.860677146911621, 'temp': 0.9835196018218995, 'critic_loss': 21.599127960205077, 'actor_loss': -8.740722274780273, 'time_step': 0.04520752429962158} step=60\n",
      "2024-02-02 00:29.18 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 20.89it/s, temp_loss=9.83, temp=0.982, critic_loss=12, actor_loss=-9.52]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.18 [info     ] SAC_20240202002915: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.0028120756149291994, 'time_algorithm_update': 0.043947792053222655, 'temp_loss': 9.74701805114746, 'temp': 0.9806029438972473, 'critic_loss': 9.283820819854736, 'actor_loss': -10.093687343597413, 'time_step': 0.0468595027923584} step=70\n",
      "2024-02-02 00:29.18 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 22.45it/s, temp_loss=9.71, temp=0.979, critic_loss=8.13, actor_loss=-10.8]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.19 [info     ] SAC_20240202002915: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.0025658607482910156, 'time_algorithm_update': 0.04113919734954834, 'temp_loss': 9.550851917266845, 'temp': 0.9777163445949555, 'critic_loss': 7.591020822525024, 'actor_loss': -11.014001560211181, 'time_step': 0.0439051628112793} step=80\n",
      "2024-02-02 00:29.19 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 19.76it/s, temp_loss=9.42, temp=0.976, critic_loss=9.01, actor_loss=-11.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.19 [info     ] SAC_20240202002915: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.0028194189071655273, 'time_algorithm_update': 0.046510505676269534, 'temp_loss': 9.389086437225341, 'temp': 0.9748726606369018, 'critic_loss': 7.570687913894654, 'actor_loss': -11.160945987701416, 'time_step': 0.04986131191253662} step=90\n",
      "2024-02-02 00:29.19 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 19.66it/s, temp_loss=9.41, temp=0.973, critic_loss=8.09, actor_loss=-11.1]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.20 [info     ] SAC_20240202002915: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0016872644424438476, 'time_algorithm_update': 0.04818646907806397, 'temp_loss': 9.439821147918702, 'temp': 0.972059690952301, 'critic_loss': 7.5951231002807615, 'actor_loss': -10.976404857635497, 'time_step': 0.050173473358154294} step=100\n",
      "2024-02-02 00:29.20 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002915\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 2 has 172 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.22 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.22 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002922\n",
      "2024-02-02 00:29.22 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.22 [debug    ] Building models...\n",
      "2024-02-02 00:29.22 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.22 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002922\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 22.03it/s, temp_loss=10, temp=1, critic_loss=154, actor_loss=-4.01]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.22 [info     ] SAC_20240202002922: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0017347335815429688, 'time_algorithm_update': 0.04260263442993164, 'temp_loss': 10.01761598587036, 'temp': 0.9983517587184906, 'critic_loss': 143.59225616455078, 'actor_loss': -4.297691059112549, 'time_step': 0.04493577480316162} step=10\n",
      "2024-02-02 00:29.22 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 21.57it/s, temp_loss=10, temp=0.997, critic_loss=133, actor_loss=-4.57]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.23 [info     ] SAC_20240202002922: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0024156570434570312, 'time_algorithm_update': 0.0423774242401123, 'temp_loss': 10.051449871063232, 'temp': 0.9953596472740174, 'critic_loss': 126.5684440612793, 'actor_loss': -4.79937481880188, 'time_step': 0.04606673717498779} step=20\n",
      "2024-02-02 00:29.23 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 20.73it/s, temp_loss=10, temp=0.994, critic_loss=118, actor_loss=-5.11]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.23 [info     ] SAC_20240202002922: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.002505326271057129, 'time_algorithm_update': 0.04480564594268799, 'temp_loss': 10.010359477996825, 'temp': 0.9923767626285553, 'critic_loss': 103.93856811523438, 'actor_loss': -5.416124105453491, 'time_step': 0.047587800025939944} step=30\n",
      "2024-02-02 00:29.23 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 15.37it/s, temp_loss=10, temp=0.991, critic_loss=87.2, actor_loss=-5.8]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.24 [info     ] SAC_20240202002922: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.0034429311752319338, 'time_algorithm_update': 0.05974862575531006, 'temp_loss': 9.97573356628418, 'temp': 0.9894068896770477, 'critic_loss': 76.30624008178711, 'actor_loss': -6.237654447555542, 'time_step': 0.06345818042755128} step=40\n",
      "2024-02-02 00:29.24 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 20.28it/s, temp_loss=9.94, temp=0.988, critic_loss=58.7, actor_loss=-6.88]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.25 [info     ] SAC_20240202002922: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0024984598159790037, 'time_algorithm_update': 0.045667266845703124, 'temp_loss': 9.942697048187256, 'temp': 0.9864508092403412, 'critic_loss': 47.87314643859863, 'actor_loss': -7.375675630569458, 'time_step': 0.04847784042358398} step=50\n",
      "2024-02-02 00:29.25 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 21.73it/s, temp_loss=9.96, temp=0.985, critic_loss=34.6, actor_loss=-8.08]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.25 [info     ] SAC_20240202002922: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0032727718353271484, 'time_algorithm_update': 0.041556143760681154, 'temp_loss': 9.899717044830322, 'temp': 0.9835090517997742, 'critic_loss': 23.722896766662597, 'actor_loss': -8.699378871917725, 'time_step': 0.04562292098999023} step=60\n",
      "2024-02-02 00:29.25 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 23.64it/s, temp_loss=9.89, temp=0.982, critic_loss=13.5, actor_loss=-9.48]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.25 [info     ] SAC_20240202002922: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.002159905433654785, 'time_algorithm_update': 0.039609670639038086, 'temp_loss': 9.795590019226074, 'temp': 0.9805848598480225, 'critic_loss': 9.575812196731567, 'actor_loss': -10.10691041946411, 'time_step': 0.04189069271087646} step=70\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.25 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 22.41it/s, temp_loss=9.73, temp=0.979, critic_loss=7.81, actor_loss=-10.7]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.26 [info     ] SAC_20240202002922: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.002548694610595703, 'time_algorithm_update': 0.04128851890563965, 'temp_loss': 9.665735912322997, 'temp': 0.9776874899864196, 'critic_loss': 7.997067165374756, 'actor_loss': -11.155108451843262, 'time_step': 0.04404768943786621} step=80\n",
      "2024-02-02 00:29.26 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 19.77it/s, temp_loss=9.54, temp=0.976, critic_loss=8, actor_loss=-11.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.26 [info     ] SAC_20240202002922: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.0027854442596435547, 'time_algorithm_update': 0.04681236743927002, 'temp_loss': 9.540342426300048, 'temp': 0.9748222351074218, 'critic_loss': 8.427433919906616, 'actor_loss': -11.355672359466553, 'time_step': 0.0496976375579834} step=90\n",
      "2024-02-02 00:29.26 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 22.07it/s, temp_loss=9.53, temp=0.973, critic_loss=8.19, actor_loss=-11.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.27 [info     ] SAC_20240202002922: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0025681257247924805, 'time_algorithm_update': 0.041550207138061526, 'temp_loss': 9.52704315185547, 'temp': 0.9719830930233002, 'critic_loss': 7.476691722869873, 'actor_loss': -11.106181335449218, 'time_step': 0.04432346820831299} step=100\n",
      "2024-02-02 00:29.27 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002922\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 3 has 316 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.29 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.29 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002929\n",
      "2024-02-02 00:29.29 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.29 [debug    ] Building models...\n",
      "2024-02-02 00:29.29 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.29 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002929\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 23.33it/s, temp_loss=9.95, temp=1, critic_loss=144, actor_loss=-3.98]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.29 [info     ] SAC_20240202002929: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0018372535705566406, 'time_algorithm_update': 0.04042935371398926, 'temp_loss': 10.054652404785156, 'temp': 0.9983511447906495, 'critic_loss': 142.4325439453125, 'actor_loss': -4.261580896377564, 'time_step': 0.04246838092803955} step=10\n",
      "2024-02-02 00:29.29 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 23.29it/s, temp_loss=10.1, temp=0.997, critic_loss=133, actor_loss=-4.57]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.29 [info     ] SAC_20240202002929: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0019794702529907227, 'time_algorithm_update': 0.039885425567626955, 'temp_loss': 10.05694284439087, 'temp': 0.9953592360019684, 'critic_loss': 124.63119812011719, 'actor_loss': -4.791972303390503, 'time_step': 0.04243628978729248} step=20\n",
      "2024-02-02 00:29.29 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 22.92it/s, temp_loss=10, temp=0.994, critic_loss=115, actor_loss=-5.09]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.30 [info     ] SAC_20240202002929: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.001972794532775879, 'time_algorithm_update': 0.040555906295776364, 'temp_loss': 10.015644645690918, 'temp': 0.9923791885375977, 'critic_loss': 104.05641860961914, 'actor_loss': -5.443577766418457, 'time_step': 0.04313158988952637} step=30\n",
      "2024-02-02 00:29.30 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 18.92it/s, temp_loss=10, temp=0.991, critic_loss=91.4, actor_loss=-5.89]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.30 [info     ] SAC_20240202002929: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.002827143669128418, 'time_algorithm_update': 0.049353170394897464, 'temp_loss': 9.994085121154786, 'temp': 0.9894116520881653, 'critic_loss': 76.67293014526368, 'actor_loss': -6.319264459609985, 'time_step': 0.05248482227325439} step=40\n",
      "2024-02-02 00:29.30 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 21.89it/s, temp_loss=9.93, temp=0.988, critic_loss=58.7, actor_loss=-6.94]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.31 [info     ] SAC_20240202002929: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0024910211563110352, 'time_algorithm_update': 0.042305827140808105, 'temp_loss': 9.942749786376954, 'temp': 0.9864576578140258, 'critic_loss': 45.915547943115236, 'actor_loss': -7.4672318458557125, 'time_step': 0.04500820636749268} step=50\n",
      "2024-02-02 00:29.31 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 20.68it/s, temp_loss=9.92, temp=0.985, critic_loss=33, actor_loss=-8.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.31 [info     ] SAC_20240202002929: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.002350473403930664, 'time_algorithm_update': 0.04506926536560059, 'temp_loss': 9.889350891113281, 'temp': 0.983517998456955, 'critic_loss': 23.224475288391112, 'actor_loss': -8.817429161071777, 'time_step': 0.047730016708374026} step=60\n",
      "2024-02-02 00:29.31 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 18.43it/s, temp_loss=9.87, temp=0.982, critic_loss=13.5, actor_loss=-9.65]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.32 [info     ] SAC_20240202002929: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.0023743867874145507, 'time_algorithm_update': 0.05050809383392334, 'temp_loss': 9.77365140914917, 'temp': 0.9805972039699554, 'critic_loss': 9.50343942642212, 'actor_loss': -10.255715084075927, 'time_step': 0.05335569381713867} step=70\n",
      "2024-02-02 00:29.32 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 21.34it/s, temp_loss=9.71, temp=0.979, critic_loss=8.76, actor_loss=-10.9]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.33 [info     ] SAC_20240202002929: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.0021156549453735353, 'time_algorithm_update': 0.04368810653686524, 'temp_loss': 9.58009214401245, 'temp': 0.977707439661026, 'critic_loss': 7.827134656906128, 'actor_loss': -11.227215766906738, 'time_step': 0.04611966609954834} step=80\n",
      "2024-02-02 00:29.33 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 19.16it/s, temp_loss=9.41, temp=0.976, critic_loss=7.88, actor_loss=-11.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.33 [info     ] SAC_20240202002929: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.0023077249526977537, 'time_algorithm_update': 0.048615241050720216, 'temp_loss': 9.465544986724854, 'temp': 0.9748592555522919, 'critic_loss': 8.401171779632568, 'actor_loss': -11.358986759185791, 'time_step': 0.051332521438598636} step=90\n",
      "2024-02-02 00:29.33 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 16.15it/s, temp_loss=9.43, temp=0.973, critic_loss=8.64, actor_loss=-11.3]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.34 [info     ] SAC_20240202002929: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0035857915878295898, 'time_algorithm_update': 0.057163023948669435, 'temp_loss': 9.498215866088866, 'temp': 0.9720348417758942, 'critic_loss': 7.735966777801513, 'actor_loss': -11.17762632369995, 'time_step': 0.06105387210845947} step=100\n",
      "2024-02-02 00:29.34 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002929\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 4 has 29 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.36 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.36 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002936\n",
      "2024-02-02 00:29.36 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.36 [debug    ] Building models...\n",
      "2024-02-02 00:29.36 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.36 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002936\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 17.70it/s, temp_loss=10, temp=1, critic_loss=146, actor_loss=-3.85]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.37 [info     ] SAC_20240202002936: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.002607893943786621, 'time_algorithm_update': 0.0528756856918335, 'temp_loss': 10.019062328338624, 'temp': 0.9983515799045563, 'critic_loss': 143.17571868896485, 'actor_loss': -4.175034904479981, 'time_step': 0.05568473339080811} step=10\n",
      "2024-02-02 00:29.37 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 18.52it/s, temp_loss=10, temp=0.997, critic_loss=136, actor_loss=-4.52]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.37 [info     ] SAC_20240202002936: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.00260469913482666, 'time_algorithm_update': 0.050455474853515626, 'temp_loss': 10.041105461120605, 'temp': 0.9953592240810394, 'critic_loss': 125.36453094482422, 'actor_loss': -4.738282203674316, 'time_step': 0.05325956344604492} step=20\n",
      "2024-02-02 00:29.37 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 18.20it/s, temp_loss=10, temp=0.994, critic_loss=111, actor_loss=-5.06]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.38 [info     ] SAC_20240202002936: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.0023153543472290037, 'time_algorithm_update': 0.051008081436157225, 'temp_loss': 10.0347749710083, 'temp': 0.9923757612705231, 'critic_loss': 103.08932647705078, 'actor_loss': -5.368414306640625, 'time_step': 0.053829622268676755} step=30\n",
      "2024-02-02 00:29.38 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 21.94it/s, temp_loss=9.98, temp=0.991, critic_loss=87.8, actor_loss=-5.78]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.38 [info     ] SAC_20240202002936: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.0021747589111328126, 'time_algorithm_update': 0.042484164237976074, 'temp_loss': 9.986350631713867, 'temp': 0.9894034564495087, 'critic_loss': 78.6671745300293, 'actor_loss': -6.20998067855835, 'time_step': 0.044933652877807616} step=40\n",
      "2024-02-02 00:29.38 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 22.34it/s, temp_loss=9.94, temp=0.988, critic_loss=63.9, actor_loss=-6.75]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.39 [info     ] SAC_20240202002936: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.002055811882019043, 'time_algorithm_update': 0.041579174995422366, 'temp_loss': 9.936163234710694, 'temp': 0.9864461958408356, 'critic_loss': 51.58513832092285, 'actor_loss': -7.294176816940308, 'time_step': 0.043834495544433597} step=50\n",
      "2024-02-02 00:29.39 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 22.76it/s, temp_loss=9.91, temp=0.985, critic_loss=36.6, actor_loss=-7.98]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.39 [info     ] SAC_20240202002936: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0023258686065673827, 'time_algorithm_update': 0.04079625606536865, 'temp_loss': 9.90310297012329, 'temp': 0.9835052609443664, 'critic_loss': 25.472989082336426, 'actor_loss': -8.602306938171386, 'time_step': 0.04333884716033935} step=60\n",
      "2024-02-02 00:29.39 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 21.95it/s, temp_loss=9.85, temp=0.982, critic_loss=16.7, actor_loss=-9.34]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.40 [info     ] SAC_20240202002936: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.00243682861328125, 'time_algorithm_update': 0.04188516139984131, 'temp_loss': 9.832934474945068, 'temp': 0.9805800497531891, 'critic_loss': 11.790692138671876, 'actor_loss': -9.96456413269043, 'time_step': 0.045019006729125975} step=70\n",
      "2024-02-02 00:29.40 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 23.19it/s, temp_loss=9.78, temp=0.979, critic_loss=9.04, actor_loss=-10.6]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.40 [info     ] SAC_20240202002936: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.00224456787109375, 'time_algorithm_update': 0.03993875980377197, 'temp_loss': 9.663540649414063, 'temp': 0.9776783585548401, 'critic_loss': 8.176937198638916, 'actor_loss': -11.015629959106445, 'time_step': 0.04252634048461914} step=80\n",
      "2024-02-02 00:29.40 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 18.82it/s, temp_loss=9.63, temp=0.976, critic_loss=8.52, actor_loss=-11.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.41 [info     ] SAC_20240202002936: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.00236663818359375, 'time_algorithm_update': 0.04972684383392334, 'temp_loss': 9.55026626586914, 'temp': 0.9748109519481659, 'critic_loss': 8.249740362167358, 'actor_loss': -11.330779933929444, 'time_step': 0.05226366519927979} step=90\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.41 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 13.93it/s, temp_loss=9.49, temp=0.973, critic_loss=8.19, actor_loss=-11.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.41 [info     ] SAC_20240202002936: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.005139684677124024, 'time_algorithm_update': 0.06480472087860108, 'temp_loss': 9.486257457733155, 'temp': 0.9719739258289337, 'critic_loss': 8.079420328140259, 'actor_loss': -11.218547821044922, 'time_step': 0.07060112953186035} step=100\n",
      "2024-02-02 00:29.41 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002936\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 5 has 303 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.43 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.43 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002943\n",
      "2024-02-02 00:29.43 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.43 [debug    ] Building models...\n",
      "2024-02-02 00:29.43 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.43 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002943\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 23.46it/s, temp_loss=10, temp=1, critic_loss=149, actor_loss=-4.01]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.44 [info     ] SAC_20240202002943: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0023166418075561525, 'time_algorithm_update': 0.03976907730102539, 'temp_loss': 10.052824115753173, 'temp': 0.9983516275882721, 'critic_loss': 144.07872467041017, 'actor_loss': -4.263494873046875, 'time_step': 0.042218255996704104} step=10\n",
      "2024-02-02 00:29.44 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 20.76it/s, temp_loss=10.1, temp=0.997, critic_loss=133, actor_loss=-4.49]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.44 [info     ] SAC_20240202002943: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.002416062355041504, 'time_algorithm_update': 0.044751429557800294, 'temp_loss': 10.052628993988037, 'temp': 0.9953600943088532, 'critic_loss': 125.71985397338867, 'actor_loss': -4.755046319961548, 'time_step': 0.04746706485748291} step=20\n",
      "2024-02-02 00:29.44 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 21.04it/s, temp_loss=10, temp=0.994, critic_loss=112, actor_loss=-5.04]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.45 [info     ] SAC_20240202002943: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.0025945186614990236, 'time_algorithm_update': 0.043743467330932616, 'temp_loss': 10.023762416839599, 'temp': 0.9923787355422974, 'critic_loss': 106.25714797973633, 'actor_loss': -5.346487617492675, 'time_step': 0.0467034101486206} step=30\n",
      "2024-02-02 00:29.45 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 14.96it/s, temp_loss=10, temp=0.991, critic_loss=90.2, actor_loss=-5.78]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.45 [info     ] SAC_20240202002943: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.0033086299896240234, 'time_algorithm_update': 0.06212460994720459, 'temp_loss': 9.976614284515382, 'temp': 0.9894100725650787, 'critic_loss': 78.4247833251953, 'actor_loss': -6.175245571136474, 'time_step': 0.06583213806152344} step=40\n",
      "2024-02-02 00:29.45 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 20.01it/s, temp_loss=9.99, temp=0.988, critic_loss=65, actor_loss=-6.67]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.46 [info     ] SAC_20240202002943: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0026576042175292967, 'time_algorithm_update': 0.046579742431640626, 'temp_loss': 9.945467758178712, 'temp': 0.9864565551280975, 'critic_loss': 51.41274147033691, 'actor_loss': -7.229245853424072, 'time_step': 0.04938347339630127} step=50\n",
      "2024-02-02 00:29.46 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 18.87it/s, temp_loss=9.87, temp=0.985, critic_loss=33.5, actor_loss=-7.91]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.46 [info     ] SAC_20240202002943: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0028571605682373045, 'time_algorithm_update': 0.04918975830078125, 'temp_loss': 9.876369762420655, 'temp': 0.9835182785987854, 'critic_loss': 25.448630714416502, 'actor_loss': -8.477298402786255, 'time_step': 0.05225715637207031} step=60\n",
      "2024-02-02 00:29.47 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 21.98it/s, temp_loss=9.78, temp=0.982, critic_loss=15.9, actor_loss=-9.28]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.47 [info     ] SAC_20240202002943: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.0024769067764282226, 'time_algorithm_update': 0.042350244522094724, 'temp_loss': 9.749868583679199, 'temp': 0.9806007027626038, 'critic_loss': 11.265226078033447, 'actor_loss': -9.849141693115234, 'time_step': 0.04492871761322022} step=70\n",
      "2024-02-02 00:29.47 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 18.22it/s, temp_loss=9.73, temp=0.979, critic_loss=7.75, actor_loss=-10.5]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.48 [info     ] SAC_20240202002943: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.002542853355407715, 'time_algorithm_update': 0.05134358406066895, 'temp_loss': 9.554006385803223, 'temp': 0.9777131378650665, 'critic_loss': 8.001553106307984, 'actor_loss': -10.871936321258545, 'time_step': 0.054114031791687014} step=80\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.48 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 13.05it/s, temp_loss=9.53, temp=0.976, critic_loss=9.65, actor_loss=-11.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.48 [info     ] SAC_20240202002943: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.002533721923828125, 'time_algorithm_update': 0.07268469333648682, 'temp_loss': 9.46200475692749, 'temp': 0.9748660027980804, 'critic_loss': 8.279413652420043, 'actor_loss': -11.157769775390625, 'time_step': 0.0756570816040039} step=90\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.48 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 17.70it/s, temp_loss=9.49, temp=0.973, critic_loss=7.67, actor_loss=-11]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.49 [info     ] SAC_20240202002943: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.002677035331726074, 'time_algorithm_update': 0.05279805660247803, 'temp_loss': 9.444074344635009, 'temp': 0.9720440089702607, 'critic_loss': 7.448064374923706, 'actor_loss': -11.020839214324951, 'time_step': 0.05586707592010498} step=100\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.49 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002943\\model_100.pt\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 6 has 166 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.51 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.51 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002951\n",
      "2024-02-02 00:29.51 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.51 [debug    ] Building models...\n",
      "2024-02-02 00:29.51 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.51 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002951\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 18.77it/s, temp_loss=9.99, temp=1, critic_loss=150, actor_loss=-3.87]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.51 [info     ] SAC_20240202002951: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0021093130111694337, 'time_algorithm_update': 0.05031740665435791, 'temp_loss': 10.022274208068847, 'temp': 0.9983514726161957, 'critic_loss': 141.54844665527344, 'actor_loss': -4.156852769851684, 'time_step': 0.052625298500061035} step=10\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.51 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 14.92it/s, temp_loss=10.1, temp=0.997, critic_loss=141, actor_loss=-4.43]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.52 [info     ] SAC_20240202002951: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0030735015869140627, 'time_algorithm_update': 0.06218080520629883, 'temp_loss': 10.050845527648926, 'temp': 0.9953588426113129, 'critic_loss': 126.81186752319336, 'actor_loss': -4.7036138534545895, 'time_step': 0.06559514999389648} step=20\n",
      "2024-02-02 00:29.52 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 13.77it/s, temp_loss=10, temp=0.994, critic_loss=117, actor_loss=-5.02]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.53 [info     ] SAC_20240202002951: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.0035589933395385742, 'time_algorithm_update': 0.06774775981903076, 'temp_loss': 10.02563705444336, 'temp': 0.992375648021698, 'critic_loss': 104.43798751831055, 'actor_loss': -5.353675794601441, 'time_step': 0.0715061902999878} step=30\n",
      "2024-02-02 00:29.53 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 19.92it/s, temp_loss=10, temp=0.991, critic_loss=88.2, actor_loss=-5.82]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.53 [info     ] SAC_20240202002951: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.0025954961776733397, 'time_algorithm_update': 0.04693596363067627, 'temp_loss': 9.979064655303954, 'temp': 0.9894048273563385, 'critic_loss': 78.14685821533203, 'actor_loss': -6.2161589622497555, 'time_step': 0.04970824718475342} step=40\n",
      "2024-02-02 00:29.53 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 21.88it/s, temp_loss=10, temp=0.988, critic_loss=62.5, actor_loss=-6.79]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.54 [info     ] SAC_20240202002951: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0021970272064208984, 'time_algorithm_update': 0.04255940914154053, 'temp_loss': 9.950257682800293, 'temp': 0.9864488542079926, 'critic_loss': 50.519830322265626, 'actor_loss': -7.356434106826782, 'time_step': 0.04505584239959717} step=50\n",
      "2024-02-02 00:29.54 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 22.60it/s, temp_loss=9.93, temp=0.985, critic_loss=34.1, actor_loss=-8.17]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.54 [info     ] SAC_20240202002951: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.002161240577697754, 'time_algorithm_update': 0.04123919010162354, 'temp_loss': 9.885108757019044, 'temp': 0.983508563041687, 'critic_loss': 25.00277280807495, 'actor_loss': -8.771540927886964, 'time_step': 0.04365098476409912} step=60\n",
      "2024-02-02 00:29.54 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 20.04it/s, temp_loss=9.9, temp=0.982, critic_loss=16.1, actor_loss=-9.43]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.55 [info     ] SAC_20240202002951: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.002651405334472656, 'time_algorithm_update': 0.04632880687713623, 'temp_loss': 9.83948049545288, 'temp': 0.980585378408432, 'critic_loss': 10.68752088546753, 'actor_loss': -10.203827953338623, 'time_step': 0.049294781684875486} step=70\n",
      "2024-02-02 00:29.55 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 16.88it/s, temp_loss=9.71, temp=0.979, critic_loss=6.83, actor_loss=-10.9]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.55 [info     ] SAC_20240202002951: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.002885103225708008, 'time_algorithm_update': 0.05508906841278076, 'temp_loss': 9.698780918121338, 'temp': 0.9776825368404388, 'critic_loss': 7.381951999664307, 'actor_loss': -11.277550792694091, 'time_step': 0.05831112861633301} step=80\n",
      "2024-02-02 00:29.55 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 20.85it/s, temp_loss=9.63, temp=0.976, critic_loss=8.84, actor_loss=-11.6]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.56 [info     ] SAC_20240202002951: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.0021372079849243165, 'time_algorithm_update': 0.04375505447387695, 'temp_loss': 9.572057437896728, 'temp': 0.9748097002506256, 'critic_loss': 8.429814529418945, 'actor_loss': -11.544767284393311, 'time_step': 0.047200489044189456} step=90\n",
      "2024-02-02 00:29.56 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 22.51it/s, temp_loss=9.53, temp=0.973, critic_loss=7.17, actor_loss=-11.5]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.56 [info     ] SAC_20240202002951: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0022143125534057617, 'time_algorithm_update': 0.04142498970031738, 'temp_loss': 9.5564453125, 'temp': 0.9719667673110962, 'critic_loss': 7.445015287399292, 'actor_loss': -11.347489166259766, 'time_step': 0.043938827514648435} step=100\n",
      "2024-02-02 00:29.56 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002951\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 7 has 50 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:29.58 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:29.58 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202002958\n",
      "2024-02-02 00:29.58 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:29.58 [debug    ] Building models...\n",
      "2024-02-02 00:29.58 [debug    ] Models have been built.\n",
      "2024-02-02 00:29.58 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202002958\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 26.10it/s, temp_loss=10, temp=1, critic_loss=149, actor_loss=-4.08]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.59 [info     ] SAC_20240202002958: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0019131898880004883, 'time_algorithm_update': 0.035964441299438474, 'temp_loss': 10.059475135803222, 'temp': 0.9983514368534088, 'critic_loss': 143.12242126464844, 'actor_loss': -4.300635194778442, 'time_step': 0.03797733783721924} step=10\n",
      "2024-02-02 00:29.59 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 25.59it/s, temp_loss=10.1, temp=0.997, critic_loss=130, actor_loss=-4.58]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.59 [info     ] SAC_20240202002958: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0013356924057006836, 'time_algorithm_update': 0.03674550056457519, 'temp_loss': 10.057591342926026, 'temp': 0.9953599572181702, 'critic_loss': 124.3447494506836, 'actor_loss': -4.823798656463623, 'time_step': 0.038350820541381836} step=20\n",
      "2024-02-02 00:29.59 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 19.89it/s, temp_loss=9.99, temp=0.994, critic_loss=110, actor_loss=-5.16]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.59 [info     ] SAC_20240202002958: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.0030847787857055664, 'time_algorithm_update': 0.046237397193908694, 'temp_loss': 10.007203578948975, 'temp': 0.9923798620700837, 'critic_loss': 102.08410186767578, 'actor_loss': -5.466353511810302, 'time_step': 0.04942255020141602} step=30\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:29.59 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 19.08it/s, temp_loss=9.99, temp=0.991, critic_loss=92.6, actor_loss=-5.92]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.00 [info     ] SAC_20240202002958: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.0026818990707397463, 'time_algorithm_update': 0.048703670501708984, 'temp_loss': 9.970911598205566, 'temp': 0.9894136607646942, 'critic_loss': 76.2709789276123, 'actor_loss': -6.3654039859771725, 'time_step': 0.051685881614685056} step=40\n",
      "2024-02-02 00:30.00 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 22.89it/s, temp_loss=9.94, temp=0.988, critic_loss=58.9, actor_loss=-6.97]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.00 [info     ] SAC_20240202002958: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.002029705047607422, 'time_algorithm_update': 0.04098329544067383, 'temp_loss': 9.920783615112304, 'temp': 0.9864615797996521, 'critic_loss': 46.57168426513672, 'actor_loss': -7.526394987106324, 'time_step': 0.04320571422576904} step=50\n",
      "2024-02-02 00:30.00 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 22.87it/s, temp_loss=9.93, temp=0.985, critic_loss=31.4, actor_loss=-8.25]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.01 [info     ] SAC_20240202002958: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.0025016307830810548, 'time_algorithm_update': 0.0403555154800415, 'temp_loss': 9.860657501220704, 'temp': 0.9835255086421967, 'critic_loss': 22.396590137481688, 'actor_loss': -8.925630187988281, 'time_step': 0.043055033683776854} step=60\n",
      "2024-02-02 00:30.01 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 19.32it/s, temp_loss=9.8, temp=0.982, critic_loss=13.2, actor_loss=-9.76]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.01 [info     ] SAC_20240202002958: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.0024984121322631837, 'time_algorithm_update': 0.048567795753479005, 'temp_loss': 9.744214725494384, 'temp': 0.9806105315685272, 'critic_loss': 9.633098030090332, 'actor_loss': -10.399378871917724, 'time_step': 0.05136542320251465} step=70\n",
      "2024-02-02 00:30.01 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 23.92it/s, temp_loss=9.68, temp=0.979, critic_loss=8.22, actor_loss=-11.1]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.02 [info     ] SAC_20240202002958: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.0025911331176757812, 'time_algorithm_update': 0.038262009620666504, 'temp_loss': 9.656760120391846, 'temp': 0.9777208805084229, 'critic_loss': 8.362942028045655, 'actor_loss': -11.386447429656982, 'time_step': 0.04120466709136963} step=80\n",
      "2024-02-02 00:30.02 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 23.58it/s, temp_loss=9.56, temp=0.976, critic_loss=8.79, actor_loss=-11.5]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.02 [info     ] SAC_20240202002958: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.001984691619873047, 'time_algorithm_update': 0.03960959911346436, 'temp_loss': 9.551610946655273, 'temp': 0.9748564481735229, 'critic_loss': 8.003802824020386, 'actor_loss': -11.528509044647217, 'time_step': 0.041731739044189455} step=90\n",
      "2024-02-02 00:30.02 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 20.48it/s, temp_loss=9.62, temp=0.973, critic_loss=8.89, actor_loss=-11.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.03 [info     ] SAC_20240202002958: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0020397186279296877, 'time_algorithm_update': 0.04531793594360352, 'temp_loss': 9.565700340270997, 'temp': 0.9720133185386658, 'critic_loss': 7.852674102783203, 'actor_loss': -11.368317699432373, 'time_step': 0.047561049461364746} step=100\n",
      "2024-02-02 00:30.03 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202002958\\model_100.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 8 has 317 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:30.04 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:30.04 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202003004\n",
      "2024-02-02 00:30.04 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:30.04 [debug    ] Building models...\n",
      "2024-02-02 00:30.04 [debug    ] Models have been built.\n",
      "2024-02-02 00:30.04 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202003004\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 24.21it/s, temp_loss=10.1, temp=1, critic_loss=151, actor_loss=-4.03]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.05 [info     ] SAC_20240202003004: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0017461776733398438, 'time_algorithm_update': 0.038915157318115234, 'temp_loss': 10.055725193023681, 'temp': 0.9983517706394196, 'critic_loss': 142.98335876464844, 'actor_loss': -4.226669788360596, 'time_step': 0.04080872535705567} step=10\n",
      "2024-02-02 00:30.05 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 20.47it/s, temp_loss=10.1, temp=0.997, critic_loss=131, actor_loss=-4.48]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.05 [info     ] SAC_20240202003004: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0023614645004272463, 'time_algorithm_update': 0.04540805816650391, 'temp_loss': 10.058867073059082, 'temp': 0.9953611314296722, 'critic_loss': 127.62885818481445, 'actor_loss': -4.655400133132934, 'time_step': 0.048068690299987796} step=20\n",
      "2024-02-02 00:30.05 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 14.12it/s, temp_loss=10, temp=0.994, critic_loss=119, actor_loss=-4.9]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.06 [info     ] SAC_20240202003004: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.0038314104080200196, 'time_algorithm_update': 0.06544229984283448, 'temp_loss': 10.009558010101319, 'temp': 0.9923811495304108, 'critic_loss': 111.06840362548829, 'actor_loss': -5.182182264328003, 'time_step': 0.06977941989898681} step=30\n",
      "2024-02-02 00:30.06 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 19.58it/s, temp_loss=9.98, temp=0.991, critic_loss=97.9, actor_loss=-5.55]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.07 [info     ] SAC_20240202003004: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.002785205841064453, 'time_algorithm_update': 0.04712545871734619, 'temp_loss': 9.983013725280761, 'temp': 0.9894141495227814, 'critic_loss': 84.4933235168457, 'actor_loss': -5.923824262619019, 'time_step': 0.05028092861175537} step=40\n",
      "2024-02-02 00:30.07 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 19.71it/s, temp_loss=9.98, temp=0.988, critic_loss=68.1, actor_loss=-6.45]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.07 [info     ] SAC_20240202003004: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.002502703666687012, 'time_algorithm_update': 0.04720470905303955, 'temp_loss': 9.944813632965088, 'temp': 0.9864602148532867, 'critic_loss': 55.929802322387694, 'actor_loss': -6.9245952606201175, 'time_step': 0.049937105178833006} step=50\n",
      "2024-02-02 00:30.07 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 17.76it/s, temp_loss=9.88, temp=0.985, critic_loss=42.1, actor_loss=-7.55]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.08 [info     ] SAC_20240202003004: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.002549552917480469, 'time_algorithm_update': 0.052438783645629886, 'temp_loss': 9.878421211242676, 'temp': 0.9835215747356415, 'critic_loss': 30.45977325439453, 'actor_loss': -8.144804573059082, 'time_step': 0.05545644760131836} step=60\n",
      "2024-02-02 00:30.08 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 18.72it/s, temp_loss=9.85, temp=0.982, critic_loss=19.3, actor_loss=-8.79]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.08 [info     ] SAC_20240202003004: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.002642321586608887, 'time_algorithm_update': 0.04939858913421631, 'temp_loss': 9.77077522277832, 'temp': 0.9806026816368103, 'critic_loss': 12.919193172454834, 'actor_loss': -9.558074760437012, 'time_step': 0.05224008560180664} step=70\n",
      "2024-02-02 00:30.08 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 16.15it/s, temp_loss=9.65, temp=0.979, critic_loss=7.8, actor_loss=-10.3]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.09 [info     ] SAC_20240202003004: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.003204536437988281, 'time_algorithm_update': 0.05696690082550049, 'temp_loss': 9.64148645401001, 'temp': 0.9777112364768982, 'critic_loss': 7.648458957672119, 'actor_loss': -10.742943382263183, 'time_step': 0.06039762496948242} step=80\n",
      "2024-02-02 00:30.09 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 17.09it/s, temp_loss=9.51, temp=0.976, critic_loss=8.06, actor_loss=-11.2]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.10 [info     ] SAC_20240202003004: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.002746129035949707, 'time_algorithm_update': 0.05466866493225098, 'temp_loss': 9.485182094573975, 'temp': 0.9748519003391266, 'critic_loss': 8.102757978439332, 'actor_loss': -11.193818378448487, 'time_step': 0.057614254951477054} step=90\n",
      "2024-02-02 00:30.10 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 18.92it/s, temp_loss=9.45, temp=0.973, critic_loss=7.91, actor_loss=-11.1]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.10 [info     ] SAC_20240202003004: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.0029547691345214845, 'time_algorithm_update': 0.048784494400024414, 'temp_loss': 9.468217086791991, 'temp': 0.9720240652561187, 'critic_loss': 7.582648611068725, 'actor_loss': -11.038836669921874, 'time_step': 0.05193870067596436} step=100\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.10 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003004\\model_100.pt\n",
      "----------------------------------------------------------------------------------------------------\n",
      "Cluster 9 has 158 trajectories out of 1996\n",
      "----------------------------------------------------------------------------------------------------\n",
      "2024-02-02 00:30.12 [debug    ] RandomIterator is selected.\n",
      "2024-02-02 00:30.12 [info     ] Directory is created at d3rlpy_logs\\SAC_20240202003012\n",
      "2024-02-02 00:30.12 [debug    ] Fitting scaler...              scaler=pixel\n",
      "2024-02-02 00:30.12 [debug    ] Building models...\n",
      "2024-02-02 00:30.12 [debug    ] Models have been built.\n",
      "2024-02-02 00:30.12 [info     ] Parameters are saved to d3rlpy_logs\\SAC_20240202003012\\params.json params={'action_scaler': None, 'actor_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'actor_learning_rate': 0.0003, 'actor_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'batch_size': 256, 'critic_encoder_factory': {'type': 'default', 'params': {'activation': 'relu', 'use_batch_norm': False, 'dropout_rate': None}}, 'critic_learning_rate': 0.0003, 'critic_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'gamma': 0.99, 'generated_maxlen': 100000, 'initial_temperature': 1.0, 'n_critics': 2, 'n_frames': 1, 'n_steps': 1, 'q_func_factory': {'type': 'mean', 'params': {'share_encoder': False}}, 'real_ratio': 1.0, 'reward_scaler': None, 'scaler': {'type': 'pixel', 'params': {}}, 'tau': 0.005, 'temp_learning_rate': 0.0003, 'temp_optim_factory': {'optim_cls': 'Adam', 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False}, 'use_gpu': None, 'algorithm': 'SAC', 'observation_shape': (17,), 'action_size': 6}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1/10: 100%|██████████| 10/10 [00:00<00:00, 21.97it/s, temp_loss=10, temp=1, critic_loss=148, actor_loss=-3.95]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.12 [info     ] SAC_20240202003012: epoch=1 step=10 epoch=1 metrics={'time_sample_batch': 0.0020972728729248048, 'time_algorithm_update': 0.04248499870300293, 'temp_loss': 10.06543264389038, 'temp': 0.9983515560626983, 'critic_loss': 143.24500885009766, 'actor_loss': -4.196102523803711, 'time_step': 0.04483487606048584} step=10\n",
      "2024-02-02 00:30.12 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_10.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 2/10: 100%|██████████| 10/10 [00:00<00:00, 19.00it/s, temp_loss=10.1, temp=0.997, critic_loss=132, actor_loss=-4.49]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.13 [info     ] SAC_20240202003012: epoch=2 step=20 epoch=2 metrics={'time_sample_batch': 0.0023012876510620115, 'time_algorithm_update': 0.04917621612548828, 'temp_loss': 10.058058261871338, 'temp': 0.9953608810901642, 'critic_loss': 128.03984146118165, 'actor_loss': -4.632120895385742, 'time_step': 0.051741409301757815} step=20\n",
      "2024-02-02 00:30.13 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_20.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 3/10: 100%|██████████| 10/10 [00:00<00:00, 18.08it/s, temp_loss=10, temp=0.994, critic_loss=116, actor_loss=-4.9]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.14 [info     ] SAC_20240202003012: epoch=3 step=30 epoch=3 metrics={'time_sample_batch': 0.002690410614013672, 'time_algorithm_update': 0.051563262939453125, 'temp_loss': 10.028261661529541, 'temp': 0.9923805177211762, 'critic_loss': 107.08182144165039, 'actor_loss': -5.202747488021851, 'time_step': 0.054517579078674314} step=30\n",
      "2024-02-02 00:30.14 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_30.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 4/10: 100%|██████████| 10/10 [00:00<00:00, 14.93it/s, temp_loss=10, temp=0.991, critic_loss=96.3, actor_loss=-5.59]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.14 [info     ] SAC_20240202003012: epoch=4 step=40 epoch=4 metrics={'time_sample_batch': 0.002533411979675293, 'time_algorithm_update': 0.06220877170562744, 'temp_loss': 9.977412223815918, 'temp': 0.9894126176834106, 'critic_loss': 84.08842697143555, 'actor_loss': -5.976075601577759, 'time_step': 0.06601197719573974} step=40\n",
      "2024-02-02 00:30.14 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_40.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 5/10: 100%|██████████| 10/10 [00:00<00:00, 18.13it/s, temp_loss=9.98, temp=0.988, critic_loss=70.2, actor_loss=-6.5]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.15 [info     ] SAC_20240202003012: epoch=5 step=50 epoch=5 metrics={'time_sample_batch': 0.0030699253082275392, 'time_algorithm_update': 0.050961089134216306, 'temp_loss': 9.94349422454834, 'temp': 0.9864600121974945, 'critic_loss': 54.594194412231445, 'actor_loss': -7.012632560729981, 'time_step': 0.0543302059173584} step=50\n",
      "2024-02-02 00:30.15 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_50.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 6/10: 100%|██████████| 10/10 [00:00<00:00, 19.37it/s, temp_loss=9.94, temp=0.985, critic_loss=40.8, actor_loss=-7.7]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.15 [info     ] SAC_20240202003012: epoch=6 step=60 epoch=6 metrics={'time_sample_batch': 0.002925205230712891, 'time_algorithm_update': 0.0473552942276001, 'temp_loss': 9.907973957061767, 'temp': 0.983521831035614, 'critic_loss': 29.38786563873291, 'actor_loss': -8.320685958862304, 'time_step': 0.050590682029724124} step=60\n",
      "2024-02-02 00:30.15 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_60.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 7/10: 100%|██████████| 10/10 [00:00<00:00, 18.82it/s, temp_loss=9.88, temp=0.982, critic_loss=17.4, actor_loss=-9.19]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.16 [info     ] SAC_20240202003012: epoch=7 step=70 epoch=7 metrics={'time_sample_batch': 0.0035079479217529296, 'time_algorithm_update': 0.04847991466522217, 'temp_loss': 9.835586738586425, 'temp': 0.9805988848209382, 'critic_loss': 11.601831150054931, 'actor_loss': -9.756582069396973, 'time_step': 0.05228738784790039} step=70\n",
      "2024-02-02 00:30.16 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_70.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 8/10: 100%|██████████| 10/10 [00:00<00:00, 12.86it/s, temp_loss=9.77, temp=0.979, critic_loss=7.54, actor_loss=-10.5]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.17 [info     ] SAC_20240202003012: epoch=8 step=80 epoch=8 metrics={'time_sample_batch': 0.0062053918838500975, 'time_algorithm_update': 0.06955955028533936, 'temp_loss': 9.719857215881348, 'temp': 0.9776967883110046, 'critic_loss': 7.431028604507446, 'actor_loss': -10.88741340637207, 'time_step': 0.07647914886474609} step=80\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.17 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_80.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9/10: 100%|██████████| 10/10 [00:00<00:00, 14.24it/s, temp_loss=9.69, temp=0.976, critic_loss=7.49, actor_loss=-11.4]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.18 [info     ] SAC_20240202003012: epoch=9 step=90 epoch=9 metrics={'time_sample_batch': 0.004046320915222168, 'time_algorithm_update': 0.06417675018310547, 'temp_loss': 9.616453266143798, 'temp': 0.9748219668865203, 'critic_loss': 7.413582420349121, 'actor_loss': -11.350332641601563, 'time_step': 0.06894483566284179} step=90\n",
      "2024-02-02 00:30.18 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_90.pt\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "Epoch 10/10: 100%|██████████| 10/10 [00:00<00:00, 13.71it/s, temp_loss=9.57, temp=0.973, critic_loss=8.02, actor_loss=-11.4]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2024-02-02 00:30.18 [info     ] SAC_20240202003012: epoch=10 step=100 epoch=10 metrics={'time_sample_batch': 0.002778434753417969, 'time_algorithm_update': 0.06885135173797607, 'temp_loss': 9.501240921020507, 'temp': 0.9719759047031402, 'critic_loss': 7.593591833114624, 'actor_loss': -11.255508327484131, 'time_step': 0.07207462787628174} step=100\n",
      "2024-02-02 00:30.18 [info     ] Model parameters are saved to d3rlpy_logs\\SAC_20240202003012\\model_100.pt\n"
     ]
    }
   ],
   "source": [
    "list_episodes_hc = make_episodes(final_obs_hc, final_act_hc, final_rew_hc, 6)\n",
    "sac_hc = fit_sac(list_episodes_hc, n_steps=100, n_steps_per_epoch=10, device=device)\n",
    "\n",
    "model_params_hc = {\n",
    "'actor_learning_rate': 3e-4,\n",
    "'critic_learning_rate': 3e-4,\n",
    "'temp_learning_rate': 3e-4,\n",
    "'batch_size': 256,\n",
    "'scaler': 'pixel',\n",
    "'use_gpu': True if device == 'cuda' else False\n",
    "}\n",
    "\n",
    "models_hc, result_data_combinations_hc = fit_per_cluster(\n",
    "    model=sac_hc, \n",
    "    model_class= d3rlpy.algos.SAC, \n",
    "    model_params=model_params_hc,\n",
    "    data_embedding=trajectory_embedding_halfcheetah,\n",
    "    list_episodes=list_episodes_hc, \n",
    "    clusters=clusters_halfcheetah, \n",
    "    trajectory_embedding=trajectory_embedding_halfcheetah,\n",
    "    test_observations=np.expand_dims(test_observation_hc, axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Print table 2\n",
      "3.3615303\n",
      "3.4558086\n",
      "3.4691343\n",
      "3.2958813\n",
      "3.362166\n",
      "3.3624597\n",
      "3.5280466\n",
      "3.3444352\n",
      "3.320675\n",
      "3.3745296\n",
      "3.382636\n",
      "Comparing the actions\n",
      "[0.         0.00412198 0.0037523  0.00352279 0.00192288 0.00384305\n",
      " 0.00279478 0.00163549 0.00526476 0.00283458 0.00136433]\n",
      "Avg Delta Q\n",
      "0.0\n",
      "0.09427833557128906\n",
      "0.1076040267944336\n",
      "0.06564903259277344\n",
      "0.0006356239318847656\n",
      "0.0009293556213378906\n",
      "0.16651630401611328\n",
      "0.017095088958740234\n",
      "0.04085540771484375\n",
      "0.012999296188354492\n",
      "0.02110576629638672\n",
      "Data distances\n",
      "[0.         1.         0.30475168 0.87304217 0.84830602 0.29862322\n",
      " 0.53408937 0.52455791 0.61629598 0.60397571 0.78555106]\n",
      "[0.1 0.3 0.  0.  0.6 0.  0.  0.  0.  0.  0. ]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "action_dict = {\t0: 'bthigh',\n",
    "            1: 'bshin',\n",
    "            2: 'bfoot',\n",
    "            3: 'fthigh',\n",
    "            4: 'fshin',\n",
    "            5: 'ffoot',            \n",
    "}\n",
    "#test_observation, models, traj_embeddings, clusters\n",
    "#np.expand_dims(test_observation_hc, axis=0)\n",
    "attributions_hc = trajectory_attributions_hc(np.expand_dims(test_observation_hc, axis=0), models_hc, trajectory_embedding_halfcheetah, clusters_halfcheetah)\n",
    "print(\"Print table 2\")\n",
    "print_results_hc(result_data_combinations_hc, np.expand_dims(test_observation_hc, axis=0), models_hc, attributions_hc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "factvenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
