import d3rlpy
import torch

# Run this code once.
dataset, env = d3rlpy.datasets.get_pendulum()
# 1. Policy Training
# setup CQL algorithm
cql = d3rlpy.algos.CQLConfig().create(device=None)
# start training
cql.fit(
    dataset,
    n_steps=1000,  # change to 1000
    n_steps_per_epoch=10,  # change to 10
    evaluators={
        # evaluate with pendulum environment
        'environment': d3rlpy.metrics.EnvironmentEvaluator(env),
    },
)
cql.save(f"cql_pretrained.d3")
