Environment:
	Python: 3.6.13
	PyTorch: 1.4.0
	Torchvision: 0.5.0
	CUDA: 10.1
	CUDNN: 7603
	NumPy: 1.19.5
	PIL: 8.1.0
Args:
	algorithm: IRM
	batch_size: 8
	data: /data/GQA/MetaDataset-subpopulation-shift
	hparams: None
	hparams_seed: 0
	log_prefix: 
	num_classes: 2
	num_domains: 2
	output_dir: train_output
	save_model_every_checkpoint: False
	seed: 0
	skip_model_save: False
	workers: 4
train_dataset.samples reverse: [('cat(indoor)', 800), ('dog(outdoor)', 800), ('cat(outdoor)', 50), ('dog(indoor)', 50)]
self.domain_to_groups {0: {'cat': ['cat(indoor)'], 'dog': ['dog(indoor)']}, 1: {'cat': ['cat(outdoor)'], 'dog': ['dog(outdoor)']}}
HParams:
	batch_size: 32
	class_balanced: False
	data_augmentation: True
	irm_lambda: 100.0
	irm_penalty_anneal_iters: 500
	lr: 5e-05
	nonlinear_classifier: False
	resnet18: True
	resnet_dropout: 0.0
	weight_decay: 0.0
step_vals {'loss': 0.6995962262153625, 'nll': 0.7047405242919922, 'penalty': -0.005144292954355478}
Iteration: 0
out-of-domain val
accuracy 0.573 	 roc_auc_score 0.611
confusion_matrix
[[155 133]
 [113 175]]
classification_report
              precision    recall  f1-score   support

           0       0.58      0.54      0.56       288
           1       0.57      0.61      0.59       288

    accuracy                           0.57       576
   macro avg       0.57      0.57      0.57       576
weighted avg       0.57      0.57      0.57       576

VAL * Acc@1 57.292
 * Acc@1 57.292 Acc@5 0.000
accuracy 0.625 	 size: 144 	 dog(indoor)
accuracy 0.618 	 size: 144 	 cat(indoor)
accuracy 0.590 	 size: 144 	 dog(outdoor)
accuracy 0.458 	 size: 144 	 cat(outdoor)
step_vals {'loss': 0.7334965467453003, 'nll': 0.731080174446106, 'penalty': 0.0024163583293557167}
step_vals {'loss': 0.6744633913040161, 'nll': 0.6749624609947205, 'penalty': -0.0004990862216800451}
step_vals {'loss': 0.5902768969535828, 'nll': 0.5901395082473755, 'penalty': 0.00013737656990997493}
step_vals {'loss': 0.6487921476364136, 'nll': 0.6594982147216797, 'penalty': -0.010706066153943539}
step_vals {'loss': 0.5378211140632629, 'nll': 0.5444730520248413, 'penalty': -0.006651942618191242}
step_vals {'loss': 0.4982139468193054, 'nll': 0.48979395627975464, 'penalty': 0.008419986814260483}
step_vals {'loss': 0.3818105459213257, 'nll': 0.36419203877449036, 'penalty': 0.017618495970964432}
step_vals {'loss': 0.38431668281555176, 'nll': 0.37164342403411865, 'penalty': 0.012673258781433105}
step_vals {'loss': 0.6136762499809265, 'nll': 0.5982176661491394, 'penalty': 0.015458600595593452}
step_vals {'loss': 0.3831233084201813, 'nll': 0.3729296922683716, 'penalty': 0.010193613357841969}
step_vals {'loss': 0.6185659766197205, 'nll': 0.6009961366653442, 'penalty': 0.01756983809173107}
step_vals {'loss': 0.3813541829586029, 'nll': 0.3857802152633667, 'penalty': -0.004426018334925175}
step_vals {'loss': 0.3583952486515045, 'nll': 0.3466627597808838, 'penalty': 0.011732480488717556}
step_vals {'loss': 0.44313958287239075, 'nll': 0.4231985807418823, 'penalty': 0.019940990954637527}
step_vals {'loss': 0.4419572651386261, 'nll': 0.4825518727302551, 'penalty': -0.04059459641575813}
step_vals {'loss': 0.49981027841567993, 'nll': 0.5277993679046631, 'penalty': -0.027989082038402557}
step_vals {'loss': 0.37304437160491943, 'nll': 0.4150022864341736, 'penalty': -0.041957929730415344}
step_vals {'loss': 0.3570723235607147, 'nll': 0.3466970920562744, 'penalty': 0.010375238955020905}
step_vals {'loss': 0.36462661623954773, 'nll': 0.37208908796310425, 'penalty': -0.007462482899427414}
step_vals {'loss': 0.3470604717731476, 'nll': 0.33536672592163086, 'penalty': 0.011693747714161873}
Iteration: 20
out-of-domain val
accuracy 0.748 	 roc_auc_score 0.870
confusion_matrix
[[263  25]
 [120 168]]
classification_report
              precision    recall  f1-score   support

           0       0.69      0.91      0.78       288
           1       0.87      0.58      0.70       288

    accuracy                           0.75       576
   macro avg       0.78      0.75      0.74       576
weighted avg       0.78      0.75      0.74       576

VAL * Acc@1 74.826
 * Acc@1 74.826 Acc@5 0.000
accuracy 0.979 	 size: 144 	 cat(indoor)
accuracy 0.847 	 size: 144 	 cat(outdoor)
accuracy 0.674 	 size: 144 	 dog(outdoor)
accuracy 0.493 	 size: 144 	 dog(indoor)
step_vals {'loss': 0.3211953341960907, 'nll': 0.3453463315963745, 'penalty': -0.024151001125574112}
step_vals {'loss': 0.6716336011886597, 'nll': 0.6294914484024048, 'penalty': 0.042142145335674286}
step_vals {'loss': 0.45196202397346497, 'nll': 0.4703896641731262, 'penalty': -0.018427647650241852}
step_vals {'loss': 0.29363471269607544, 'nll': 0.29468774795532227, 'penalty': -0.001053021289408207}
step_vals {'loss': 0.43080201745033264, 'nll': 0.43225064873695374, 'penalty': -0.0014486340805888176}
step_vals {'loss': 0.38744571805000305, 'nll': 0.37553948163986206, 'penalty': 0.011906227096915245}
step_vals {'loss': 0.4281840920448303, 'nll': 0.43187153339385986, 'penalty': -0.00368743808940053}
step_vals {'loss': 0.5056167840957642, 'nll': 0.46323201060295105, 'penalty': 0.04238475114107132}
step_vals {'loss': 0.35917508602142334, 'nll': 0.36869096755981445, 'penalty': -0.009515870362520218}
step_vals {'loss': 0.2722247540950775, 'nll': 0.24904344975948334, 'penalty': 0.023181308060884476}
step_vals {'loss': 0.3676098585128784, 'nll': 0.3511282503604889, 'penalty': 0.016481606289744377}
step_vals {'loss': 0.36279386281967163, 'nll': 0.3537728786468506, 'penalty': 0.009020982310175896}
step_vals {'loss': 0.43062710762023926, 'nll': 0.44360268115997314, 'penalty': -0.012975568883121014}
step_vals {'loss': 0.2980983853340149, 'nll': 0.2929288148880005, 'penalty': 0.005169564858078957}
step_vals {'loss': 0.2685759663581848, 'nll': 0.26463836431503296, 'penalty': 0.003937605768442154}
step_vals {'loss': 0.22659620642662048, 'nll': 0.20496857166290283, 'penalty': 0.0216276403516531}
step_vals {'loss': 0.37441015243530273, 'nll': 0.3647473454475403, 'penalty': 0.009662799537181854}
step_vals {'loss': 0.2810535132884979, 'nll': 0.268299400806427, 'penalty': 0.01275410782545805}
step_vals {'loss': 0.3313727080821991, 'nll': 0.3233224153518677, 'penalty': 0.008050303906202316}
step_vals {'loss': 0.4136694669723511, 'nll': 0.4226545989513397, 'penalty': -0.008985143154859543}
Iteration: 40
out-of-domain val
accuracy 0.825 	 roc_auc_score 0.904
confusion_matrix
[[210  78]
 [ 23 265]]
classification_report
              precision    recall  f1-score   support

           0       0.90      0.73      0.81       288
           1       0.77      0.92      0.84       288

    accuracy                           0.82       576
   macro avg       0.84      0.82      0.82       576
weighted avg       0.84      0.82      0.82       576

VAL * Acc@1 82.465
 * Acc@1 82.465 Acc@5 0.000
accuracy 0.979 	 size: 144 	 dog(outdoor)
accuracy 0.861 	 size: 144 	 dog(indoor)
accuracy 0.847 	 size: 144 	 cat(indoor)
accuracy 0.611 	 size: 144 	 cat(outdoor)
step_vals {'loss': 0.3837997019290924, 'nll': 0.3789910674095154, 'penalty': 0.004808626603335142}
step_vals {'loss': 0.248114675283432, 'nll': 0.254553884267807, 'penalty': -0.006439204793423414}
step_vals {'loss': 0.2881775200366974, 'nll': 0.29398074746131897, 'penalty': -0.005803233943879604}
step_vals {'loss': 0.40445443987846375, 'nll': 0.4409671425819397, 'penalty': -0.036512695252895355}
step_vals {'loss': 0.25683215260505676, 'nll': 0.2699476182460785, 'penalty': -0.0131154740229249}
step_vals {'loss': 0.2981202006340027, 'nll': 0.30351224541664124, 'penalty': -0.00539203267544508}
step_vals {'loss': 0.21404936909675598, 'nll': 0.20167824625968933, 'penalty': 0.01237112283706665}
step_vals {'loss': 0.48445338010787964, 'nll': 0.4450209140777588, 'penalty': 0.03943246230483055}
step_vals {'loss': 0.16715648770332336, 'nll': 0.16334298253059387, 'penalty': 0.003813502611592412}
step_vals {'loss': 0.24276910722255707, 'nll': 0.2269456684589386, 'penalty': 0.015823444351553917}
step_vals {'loss': 0.3902190029621124, 'nll': 0.3996792137622833, 'penalty': -0.009460203349590302}
step_vals {'loss': 0.43280351161956787, 'nll': 0.44839829206466675, 'penalty': -0.015594790689647198}
step_vals {'loss': 0.37705734372138977, 'nll': 0.3793787956237793, 'penalty': -0.0023214437533169985}
step_vals {'loss': 0.46131226420402527, 'nll': 0.45935508608818054, 'penalty': 0.0019571837037801743}
step_vals {'loss': 0.15745383501052856, 'nll': 0.14258532226085663, 'penalty': 0.014868507161736488}
step_vals {'loss': 0.3951264023780823, 'nll': 0.4039679169654846, 'penalty': -0.008841516450047493}
step_vals {'loss': 0.27898335456848145, 'nll': 0.26848500967025757, 'penalty': 0.010498344898223877}
step_vals {'loss': 0.21856321394443512, 'nll': 0.21057447791099548, 'penalty': 0.007988735102117062}
step_vals {'loss': 0.2491857409477234, 'nll': 0.2508975863456726, 'penalty': -0.0017118491232395172}
step_vals {'loss': 0.26199808716773987, 'nll': 0.2667835056781769, 'penalty': -0.0047854166477918625}
Iteration: 60
out-of-domain val
accuracy 0.830 	 roc_auc_score 0.899
confusion_matrix
[[231  57]
 [ 41 247]]
classification_report
              precision    recall  f1-score   support

           0       0.85      0.80      0.82       288
           1       0.81      0.86      0.83       288

    accuracy                           0.83       576
   macro avg       0.83      0.83      0.83       576
weighted avg       0.83      0.83      0.83       576

VAL * Acc@1 82.986
 * Acc@1 82.986 Acc@5 0.000
accuracy 0.965 	 size: 144 	 dog(outdoor)
accuracy 0.889 	 size: 144 	 cat(indoor)
accuracy 0.750 	 size: 144 	 dog(indoor)
accuracy 0.715 	 size: 144 	 cat(outdoor)
step_vals {'loss': 0.29219815135002136, 'nll': 0.30788499116897583, 'penalty': -0.015686849132180214}
step_vals {'loss': 0.31497374176979065, 'nll': 0.3139319121837616, 'penalty': 0.0010418249294161797}
step_vals {'loss': 0.20685237646102905, 'nll': 0.18077616393566132, 'penalty': 0.026076218113303185}
step_vals {'loss': 0.15763509273529053, 'nll': 0.15134236216545105, 'penalty': 0.006292731035500765}
step_vals {'loss': 0.47955435514450073, 'nll': 0.4659791588783264, 'penalty': 0.013575202785432339}
step_vals {'loss': 0.2507096529006958, 'nll': 0.246537983417511, 'penalty': 0.004171658772975206}
step_vals {'loss': 0.2467607855796814, 'nll': 0.24397501349449158, 'penalty': 0.002785773016512394}
step_vals {'loss': 0.23065920174121857, 'nll': 0.22268745303153992, 'penalty': 0.0079717468470335}
step_vals {'loss': 0.26058173179626465, 'nll': 0.27634692192077637, 'penalty': -0.015765182673931122}
step_vals {'loss': 0.21603107452392578, 'nll': 0.20980530977249146, 'penalty': 0.00622576056048274}
step_vals {'loss': 0.16773536801338196, 'nll': 0.14940014481544495, 'penalty': 0.01833523064851761}
step_vals {'loss': 0.27091971039772034, 'nll': 0.2765777111053467, 'penalty': -0.005657999310642481}
step_vals {'loss': 0.3549903929233551, 'nll': 0.35189056396484375, 'penalty': 0.003099842695519328}
step_vals {'loss': 0.24545754492282867, 'nll': 0.2503516674041748, 'penalty': -0.0048941271379590034}
step_vals {'loss': 0.3076038360595703, 'nll': 0.31946325302124023, 'penalty': -0.011859403923153877}
step_vals {'loss': 0.16965143382549286, 'nll': 0.16318248212337494, 'penalty': 0.006468947045505047}
step_vals {'loss': 0.2578912377357483, 'nll': 0.26319026947021484, 'penalty': -0.005299046635627747}
step_vals {'loss': 0.21151363849639893, 'nll': 0.21515487134456635, 'penalty': -0.0036412281915545464}
step_vals {'loss': 0.29806646704673767, 'nll': 0.31392019987106323, 'penalty': -0.01585374027490616}
step_vals {'loss': 0.35748088359832764, 'nll': 0.35926690697669983, 'penalty': -0.0017860297812148929}
Iteration: 80
out-of-domain val
accuracy 0.823 	 roc_auc_score 0.906
confusion_matrix
[[230  58]
 [ 44 244]]
classification_report
              precision    recall  f1-score   support

           0       0.84      0.80      0.82       288
           1       0.81      0.85      0.83       288

    accuracy                           0.82       576
   macro avg       0.82      0.82      0.82       576
weighted avg       0.82      0.82      0.82       576

VAL * Acc@1 82.292
 * Acc@1 82.292 Acc@5 0.000
accuracy 0.958 	 size: 144 	 dog(outdoor)
accuracy 0.903 	 size: 144 	 cat(indoor)
accuracy 0.736 	 size: 144 	 dog(indoor)
accuracy 0.694 	 size: 144 	 cat(outdoor)
