cd ../
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_truthfullqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_sciq.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_mmlu.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Accuracy +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_triviaqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_coqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_samsum.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScoreInv +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_wmt19_deen.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Comet +n_steps='[]' +metric_thr=0.85
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_wmt14_fren.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Comet +n_steps='[]' +metric_thr=0.85
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_medquad.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_xsum.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=meta-llama/Llama-3.1-8B +model.attn_implementation=eager cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScoreInv +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_truthfullqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_sciq.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_mmlu.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Accuracy +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_triviaqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_coqa.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]' +aggregation_func=all
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_samsum.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScoreInv +n_steps='[]'
wait
CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_medquad.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=1000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScore +n_steps='[]'
wait
# CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_wmt19_deen.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Comet +n_steps='[]' +metric_thr=0.85
# wait
# CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_wmt14_fren.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=Comet +n_steps='[]' +metric_thr=0.85
# wait
# CUDA_VISIBLE_DEVICES=0 HYDRA_CONFIG=./configs/polygraph_eval_xsum.yaml python run_polygraph_unsup.py ignore_exceptions=False use_density_based_ue=False batch_size=1 subsample_train_dataset=100 subsample_background_train_dataset=100 subsample_eval_dataset=2000 model.path=tiiuae/Falcon3-10B-Base cache_path=./workdir/output_unsup_may25_final +generation_params.samples_n=5 +train_pi=False use_seq_ue=True +run_pi_baselines=False +run_baselines=True +run_supervised_baselines=False +target_train_metric=AlignScoreInv +n_steps='[]'
# wait