variant,model_name,train_examples,eval_examples,counterfactual_examples,lambda_cons,final_lm_loss,final_cons_loss,gen_claim_acc,cls_claim_acc,cfact_cls_follows_swap,cfact_cls_follows_orig,minimal_pair_flip_acc,shuffled_cls_acc,wrong_span_cls_acc,patch_rationale_effect,patch_theorem_effect,patch_random_effect,patch_rationale_minus_random,head_patch_rationale_effect,head_patch_theorem_effect,head_patch_random_effect,head_patch_rationale_minus_random
lm_only,gpt2,1000,200,200,0.0,0.14203542739152908,0.8299759232997894,0.51,0.49,0.495,0.495,0.0,,,6.085674543857574,0.0,0.6761012859642506,5.4095732578933236,0.1858188410960138,0.0,0.18668433199077844,-0.0008654908947646478
no_consistency_loss,gpt2,1000,200,200,0.0,0.1584840026497841,1.0527414178848267,0.525,0.51,0.505,0.505,0.0,,,3.7180755519866944,0.0,-1.3784735988378525,5.096549150824547,0.0013430240303277969,0.0,-0.06277437956631184,0.06411740359663963
rationale_only,gpt2,1000,200,200,0.5,0.15680942684412003,0.0002023233783984324,0.64,1.0,1.0,0.01,1.0,,,5.253713107258082,0.0,-4.520738381432369,9.77445148869045,11.583938789844513,0.0,7.066421710319817,4.517517079524696
full_sequence,gpt2,1000,200,200,0.5,0.13825152188539505,0.000615099166170694,0.99,1.0,0.995,0.005,1.0,,,9.151794266819953,0.0,3.688784818828106,5.463009447991848,7.867002557277679,0.0,7.642872409034521,0.2241301482431588
proof_only,gpt2,1000,200,200,0.5,0.16725767374038697,0.00011841919385915389,0.915,1.0,0.01,1.0,1.0,,,1.409820587158203,0.0,6.617573793232441,-5.207753206074238,0.0,0.0,3.7575596159100533,-3.7575596159100533
random_consistency,gpt2,1000,200,200,0.5,0.1634265649318695,0.7701328659057617,0.775,0.41,0.4,0.59,0.07,0.41,,6.134566604614258,0.0,13.628279208183288,-7.493712603569031,-0.05438706945814192,0.0,-0.07892605549655855,0.02453898603841663
wrong_span,gpt2,1000,200,200,0.5,0.17943918108940124,0.6895494174957275,0.54,0.52,0.495,0.515,0.0,,0.52,9.574438983142375,0.0,-1.1668472954034805,10.741286278545855,0.0,0.0,0.013756288602948188,-0.013756288602948188
