accelerate --config_file deepspeed.yaml  launch summarize_from_feedback_details/sft.py \
--gradient_accumulation_steps 8 --local_micro_batch_size 2 --track

accelerate --config_file deepspeed.yaml  launch summarize_from_feedback_details/reward.py \
--gradient_accumulation_steps 8 --local_micro_batch_size 2 --track

accelerate --config_file deepspeed.yaml  launch summarize_from_feedback_details/dpo.py \
--gradient_accumulation_steps 8 --local_micro_batch_size 2 --track

accelerate --config_file deepspeed.yaml  launch summarize_from_feedback_details/pypo.py \
--gradient_accumulation_steps 8 --local_micro_batch_size 2 --track
