CUDA_VISIBLE_DEVICES=0,1 python3 redteam_instruction_dolly.py '{"method.init_kl_coef": 0.001, "method.bleu_reward_coef": 0, "method.cossimemb_reward_coef": 0, "method.ent_reward_coef": 0.0, "method.bleu_reward_grams": "[2, 3, 4, 5]", "method.textual_sim_reward_coef": 0, "method.target_sim_div_reward_coef": 0.0, "method.giberish_penalty_coef": 1.0, "method.giberish_model_device": "cpu", "method.cossimemb_model_device": "cpu", "train.batch_size": 64, "train.logging_dir": "results/databricks_toxicity_gpt2_dolly_peft/ppo64_gpt2_kl0.001_bleu0_cossimemb0_ent0.0_textsim0_gebrish1.0_targdiv0.0/2000", "train.checkpoint_dir": "results/databricks_toxicity_gpt2_dolly_peft/ppo64_gpt2_kl0.001_bleu0_cossimemb0_ent0.0_textsim0_gebrish1.0_targdiv0.0/2000", "train.seed": 2000, "train.minibatch_size": 8, "method.chunk_size": 8, "method.reward_model_device_offset": 1}'
CUDA_VISIBLE_DEVICES=0,1 python3 redteam_instruction_dolly.py '{"method.init_kl_coef": 0.001, "method.bleu_reward_coef": 0, "method.cossimemb_reward_coef": 0, "method.ent_reward_coef": 0.0, "method.bleu_reward_grams": "[2, 3, 4, 5]", "method.textual_sim_reward_coef": 0, "method.target_sim_div_reward_coef": 0.0, "method.giberish_penalty_coef": 1.0, "method.giberish_model_device": "cpu", "method.cossimemb_model_device": "cpu", "train.batch_size": 64, "train.logging_dir": "results/databricks_toxicity_gpt2_dolly_peft/ppo64_gpt2_kl0.001_bleu0_cossimemb0_ent0.0_textsim0_gebrish1.0_targdiv0.0/3000", "train.checkpoint_dir": "results/databricks_toxicity_gpt2_dolly_peft/ppo64_gpt2_kl0.001_bleu0_cossimemb0_ent0.0_textsim0_gebrish1.0_targdiv0.0/3000", "train.seed": 3000, "train.minibatch_size": 8, "method.chunk_size": 8, "method.reward_model_device_offset": 1}'
CUDA_VISIBLE_DEVICES=0,1 python3 redteam_instruction_dolly.py '{"method.init_kl_coef": 0.001, "method.bleu_reward_coef": 0, "method.cossimemb_reward_coef": 0, "method.ent_reward_coef": 0.0, "method.bleu_reward_grams": "[2, 3, 4, 5]", "method.textual_sim_reward_coef": 0, "method.target_sim_div_reward_coef": 0.0, "method.giberish_penalty_coef": 1.0, "method.giberish_model_device": "cpu", "method.cossimemb_model_device": "cpu", "train.batch_size": 64, "train.logging_dir": "results/databricks_toxicity_gpt2_dolly_peft/ppo64_gpt2_kl0.001_bleu0_cossimemb0_ent0.0_textsim0_gebrish1.0_targdiv0.0/1000", "train.checkpoint_dir": "results/databricks_toxicity_gpt2_dolly_peft/ppo64_gpt2_kl0.001_bleu0_cossimemb0_ent0.0_textsim0_gebrish1.0_targdiv0.0/1000", "train.seed": 1000, "train.minibatch_size": 8, "method.chunk_size": 8, "method.reward_model_device_offset": 1}'