.gitignore
.pre-commit-config.yaml
LICENSE
README.md
pyproject.toml
configs/accelerate_config.yaml
configs/accelerate_config_simple.yaml
configs/config.yaml
configs/config_rl.yaml
configs/config_rm.yaml
configs/deepspeed_rl.json
configs/ppo_config.yaml
runs/.gitignore
src/__init__.py
src/bon/README.md
src/bon/__init__.py
src/bon/bon_sampling.py
src/bon/ensemble_rm.py
src/bon/run_bon_ensembles.py
src/bon/run_bon_pipeline.py
src/bon/utils.py
src/data_utils/README.md
src/data_utils/__init__.py
src/data_utils/rm_dataset_formatter.py
src/data_utils/oa_custom_datasets/__init__.py
src/data_utils/oa_custom_datasets/dataset_loader.py
src/data_utils/oa_custom_datasets/get_dataset_patch.py
src/data_utils/oa_custom_datasets/rank_datasets.py
src/llm_optimization.egg-info/PKG-INFO
src/llm_optimization.egg-info/SOURCES.txt
src/llm_optimization.egg-info/dependency_links.txt
src/llm_optimization.egg-info/requires.txt
src/llm_optimization.egg-info/top_level.txt
src/ppo/README.md
src/ppo/__init__.py
src/ppo/custom_helpers.py
src/ppo/run_ppo_gold_eval.py
src/ppo/trainer_rl.py
src/ppo/custom_trlx_trainers/__init__.py
src/ppo/custom_trlx_trainers/custom_accelerate_base_trainer.py
src/ppo/custom_trlx_trainers/custom_accelerate_ppo_trainer.py
src/reward_modeling/scoring/__init__.py
src/reward_modeling/scoring/ppo_reward_functions.py
src/reward_modeling/scoring/score.py
src/reward_modeling/scoring/score_true.py
src/reward_modeling/training/README.md
src/reward_modeling/training/__init__.py
src/reward_modeling/training/load_rm.py
src/reward_modeling/training/save_datasets.py
src/reward_modeling/training/trainer_rm.py
src/sft/__init__.py
src/sft/trainer_sft.py
src/utils/merge_seeds_script.py
src/utils/merge_utils.py