streamlit run reward-scaling-lll.py --scored-samples anonymous/hh-generated_flan_t5_large_with_features2 --model-A ../mle-train/out/hh-rlhf_with_features_flan_t5_large_train_coefficients.pkl --model-B ../mle-train/out/hh-rlhf_with_features_flan_t5_large_test_coefficients.pkl

streamlit run reward-scaling-lll.py --scored-samples anonymous/hh-generated_flan_t5_large_with_features2 --model-A ../mle-train/out/hh-rlhf_with_features_flan_t5_large_train_coefficients.pkl --model-B ../mle-train/out/hh-rlhf_with_features_flan_t5_large_lll_relabeled_test_coefficients.pkl
