python3 -m inference_rlhf.code.generate sampling.seed=1337 policy=qwen-25-coder-14b sampling.num_blocks=1 user=anonymousanonymous task=code_contests sampling.k=1000 sampling.temperature=0.6 sampling.top_p=0.95 task.generation.generation_idx=0 sampling.tensor_parallel_size=2
# CUDA_VISIBLE_DEVICES=1 python3 -m inference_rlhf.code.generate sampling.seed=1337 policy=llama-3-8b sampling.num_blocks=1 user=anonymousanonymous task=code_contests sampling.k=1000 sampling.temperature=0.6 sampling.top_p=0.95 task.generation.generation_idx=4 &
# CUDA_VISIBLE_DEVICES=2 python3 -m inference_rlhf.code.generate sampling.seed=1337 policy=llama-3-8b sampling.num_blocks=1 user=anonymousanonymous task=code_contests sampling.k=1000 sampling.temperature=0.6 sampling.top_p=0.95 task.generation.generation_idx=5 &
# CUDA_VISIBLE_DEVICES=3 python3 -m inference_rlhf.code.generate sampling.seed=1337 policy=llama-3-8b sampling.num_blocks=1 user=anonymousanonymous task=code_contests sampling.k=1000 sampling.temperature=0.6 sampling.top_p=0.95 task.generation.generation_idx

wait