cd ../
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_truthfullqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]'
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_sciq.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_mmlu.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Accuracy +n_steps='[]'
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_triviaqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_coqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_samsum.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScoreInv +n_steps='[]'
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_wmt19_deen.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Comet +n_steps='[]' +metric_thr=0.85
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_wmt14_fren.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Comet +n_steps='[]' +metric_thr=0.85
# wait
# CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_xsum.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScoreInv +n_steps='[]'
# wait
CUDA_VISIBLE_DEVICES=2 HYDRA_CONFIG=./configs/polygraph_eval_medquad.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=1000 model.path=Qwen/Qwen2.5-7B cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]'
wait