# this is the code for project policy extraction to leverage reward-free data.


stage 1: pretrain

```shell
python pretrain.py \
    --env "${env}" \
    --seed "${i}" \
    --save_model \
    --train_type r4 \
    --comment r4_pretrain
```

```shell
python pretrain_metaworld.py \
    --env "${env}" \
    --seed "${i}" \
    --save_model \
    --train_type r4 \
    --comment r4_pretrain
```

stage2: reuse

```shell
CUDA_VISIBLE_DEVICES=0 python cup.py --comment from_r4_random_reproduce --env halfcheetah-random-v0 --seed 0 --load_guidance ./TD3_BC_halfcheetah-random-v0_r4_reproduce_0
```

```shell
CUDA_VISIBLE_DEVICES=0 python pex.py --comment from_r4_medium_reproduce --env halfcheetah-medium-v0 --seed 0 --load_guidance ./TD3_BC_halfcheetah-medium-v0_r4_reproduce_0
```

```shell
CUDA_VISIBLE_DEVICES=0 python pex.py --comment from_r4_medium_reproduce --env halfcheetah-medium-v0 --seed 0 --load_guidance ./TD3_BC_halfcheetah-medium-v0_r4_reproduce_0
```

```shell
CUDA_VISIBLE_DEVICES=0 python pex_multitask.py --comment test --env hammer-v2 --seed 0 --load_guidance TD3_BC_pick-place-v2_r4_pretrain_metaworld_0 TD3_BC_reach-v2_r4_pretrain_metaworld_0 TD3_BC_push-v2_r4_pretrain_metaworld_0
```


evaluation:

```shell
CUDA_VISIBLE_DEVICES=1 python multihead_evaluation.py --load_actor TD3_BC_halfcheetah-medium-v0_r4_reproduce_0 --comment test_medium --env halfcheetah-medium-v0 --reward_dim 256

```



