#!/usr/bin/env python

import gym
import d3rlpy

d3rlpy.seed(100)

# prepare environment
env = gym.make('CartPole-v0')

# prepare algorithms
policy = d3rlpy.algos.DiscreteRandomPolicy()

# prepare utilities
buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env)

# start training
policy.collect(env, buffer=buffer, n_steps=100000)

# export replay buffer as MDPDataset
dataset = buffer.to_mdp_dataset()

# save MDPDataset
dataset.dump('cartpole_random.h5')
