ASHA_Time_tb_ASHA_cheap_svhn_Adam_Adam
training_loss: 0.183522
test_loss: 0.550373
validation_loss: 0.511474
{'adam_one_minus_b1': 0.1262533228048332, 'adam_one_minus_b2': 0.02532913796884084, 'master_config': {'batch_size': 256, 'dataset': {'name': 'SVHN', 'normalise_inputs': True, 'normalise_outputs': False, 'validation_proportion': 0.1666666667}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 10, 'optimiser': {'learning_rate': 0.001, 'name': 'adam'}, 'run_group_name': 'tb_ASHA_cheap_svhn_Adam', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.eps': 3.3548604705643876e-06, 'opt.learning_rate': 0.0018361521605440613, 'root.batch_size': 1600}
ASHA_Time_tb_ASHA_cheap_svhn_KFACDeepMind_KFACDeepMind
training_loss: 0.023456
test_loss: 0.672046
validation_loss: 0.610102
{'master_config': {'batch_size': 256, 'dataset': {'name': 'SVHN', 'normalise_inputs': True, 'normalise_outputs': False, 'validation_proportion': 0.1666666667}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 10, 'optimiser': {'curvature_ema': 0.95, 'damping_adaptation_interval': 1, 'initial_damping': 1.0, 'inverse_update_period': 1, 'l2_reg': 0, 'name': 'kfac_jax', 'use_adaptive_damping': True, 'use_adaptive_learning_rate': True, 'use_adaptive_momentum': True}, 'run_group_name': 'tb_ASHA_cheap_svhn_KFACDeepMind', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.5760476190459567, 'root.batch_size': 1600}
ASHA_Time_tb_ASHA_cheap_svhn_KFACKazuki_KFACKazuki
training_loss: 0.138403
test_loss: 0.724875
validation_loss: 0.665415
{'master_config': {'batch_size': 256, 'dataset': {'name': 'SVHN', 'normalise_inputs': True, 'normalise_outputs': False, 'validation_proportion': 0.1666666667}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 10, 'optimiser': {'curvature_ema': 0, 'initial_damping': 1e-05, 'inverse_update_period': 1, 'l2_reg': 0, 'learning_rate': 0.1, 'min_damping': 0, 'momentum': 0.9, 'name': 'kfac_jax', 'num_burnin_steps': 0, 'use_adaptive_damping': False, 'use_adaptive_learning_rate': False, 'use_adaptive_momentum': False}, 'run_group_name': 'tb_ASHA_cheap_svhn_KFACKazuki', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.00037934102248669106, 'opt.learning_rate': 0.008828245252202133, 'opt.momentum': 0.22213346653123212, 'root.batch_size': 800}
ASHA_Time_tb_ASHA_cheap_svhn_OursAdaptive_OursAdaptive
training_loss: 0.447874
test_loss: 0.710470
validation_loss: 0.648682
{'master_config': {'batch_size': 256, 'dataset': {'name': 'SVHN', 'normalise_inputs': True, 'normalise_outputs': False, 'validation_proportion': 0.1666666667}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 10, 'optimiser': {'acceleration_kwargs': {'inverse_func': 'samelson', 'modifier': 'sablonniere'}, 'acceleration_order': 1, 'acceleration_type': 'shanks', 'adaptive_update': True, 'damping_min': 1e-08, 'hessian_damping_factor': 0.9, 'initial_damping': 1, 'initial_scale_factor': 100, 'name': 'HessianSeriesOptimiser', 'num_update_steps': 20, 'scale_factor_method': 'lower_bound', 'scale_factor_multiplier': 1.0, 'series_name': 'damped'}, 'run_group_name': 'tb_ASHA_cheap_svhn_OursAdaptive', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 4.138327922567827e-07, 'root.batch_size': 100, 'shanks_acceleration_dict': {'acceleration_order': 2, 'num_update_steps': 5}}
ASHA_Time_tb_ASHA_cheap_svhn_OursDampedUnadaptive_OursDampedUnadaptive
training_loss: 0.090128
test_loss: 0.595463
validation_loss: 0.520379
{'master_config': {'batch_size': 256, 'dataset': {'name': 'SVHN', 'normalise_inputs': True, 'normalise_outputs': False, 'validation_proportion': 0.1666666667}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 10, 'optimiser': {'acceleration_kwargs': {'inverse_func': 'samelson', 'modifier': 'sablonniere'}, 'acceleration_order': 1, 'acceleration_type': 'shanks', 'initial_damping': 0.0001, 'initial_scale_factor': 100, 'learning_rate': 1.0, 'momentum': 0.5, 'name': 'HessianSeriesOptimiser', 'num_update_steps': 20, 'scale_factor_method': 'lower_bound', 'scale_factor_multiplier': 1.0, 'series_name': 'damped'}, 'run_group_name': 'tb_ASHA_cheap_svhn_OursDampedUnadaptive', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.000187469338332564, 'opt.learning_rate': 0.10813907778174993, 'opt.momentum': 0.32734397184641223, 'root.batch_size': 200, 'shanks_acceleration_dict': {'acceleration_order': 0, 'num_update_steps': 1}}
ASHA_Time_tb_ASHA_cheap_svhn_SGD_SGD
training_loss: 0.229699
test_loss: 0.486794
validation_loss: 0.446993
{'master_config': {'batch_size': 256, 'dataset': {'name': 'SVHN', 'normalise_inputs': True, 'normalise_outputs': False, 'validation_proportion': 0.1666666667}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 10, 'optimiser': {'learning_rate': 0.1, 'name': 'sgd'}, 'run_group_name': 'tb_ASHA_cheap_svhn_SGD', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.learning_rate': 0.013319997431882008, 'root.batch_size': 50}
