ASHA_Time_tb_ASHA_cheap_cifar-10_Adam_Adam
training_loss: 1.400323
test_loss: 1.435618
validation_loss: 1.450168
{'adam_one_minus_b1': 0.002655117842014758, 'adam_one_minus_b2': 0.00010025076087166564, 'master_config': {'batch_size': 100, 'dataset': {'name': 'CIFAR10', 'normalise_inputs': False, 'normalise_outputs': False, 'validation_proportion': 0.1}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 72, 'optimiser': {'learning_rate': 0.001, 'name': 'adam'}, 'run_group_name': 'tb_ASHA_cheap_cifar-10_Adam', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.eps': 0.0003613837512492487, 'opt.learning_rate': 7.70367560950785e-05, 'root.batch_size': 800}
ASHA_Time_tb_ASHA_cheap_cifar-10_KFACDeepMind_KFACDeepMind
training_loss: 1.193313
test_loss: 1.344628
validation_loss: 1.304051
{'master_config': {'batch_size': 100, 'dataset': {'name': 'CIFAR10', 'normalise_inputs': False, 'normalise_outputs': False, 'validation_proportion': 0.1}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 72, 'optimiser': {'curvature_ema': 0.95, 'damping_adaptation_interval': 1, 'initial_damping': 1.0, 'inverse_update_period': 1, 'l2_reg': 0, 'name': 'kfac_jax', 'use_adaptive_damping': True, 'use_adaptive_learning_rate': True, 'use_adaptive_momentum': True}, 'run_group_name': 'tb_ASHA_cheap_cifar-10_KFACDeepMind', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.21659276269616204, 'root.batch_size': 800}
ASHA_Time_tb_ASHA_cheap_cifar-10_KFACKazuki_KFACKazuki
training_loss: 0.900991
test_loss: 1.608021
validation_loss: 1.571907
{'master_config': {'batch_size': 100, 'dataset': {'name': 'CIFAR10', 'normalise_inputs': False, 'normalise_outputs': False, 'validation_proportion': 0.1}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 72, 'optimiser': {'curvature_ema': 0, 'initial_damping': 1e-05, 'inverse_update_period': 1, 'l2_reg': 0, 'learning_rate': 0.1, 'min_damping': 0, 'momentum': 0.9, 'name': 'kfac_jax', 'num_burnin_steps': 0, 'use_adaptive_damping': False, 'use_adaptive_learning_rate': False, 'use_adaptive_momentum': False}, 'run_group_name': 'tb_ASHA_cheap_cifar-10_KFACKazuki', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.0022138478629226102, 'opt.learning_rate': 0.022898413820153875, 'opt.momentum': 0.006704954461199334, 'root.batch_size': 3200}
ASHA_Time_tb_ASHA_cheap_cifar-10_OursAdaptive_OursAdaptive
training_loss: 0.891253
test_loss: 1.560586
validation_loss: 1.554743
{'master_config': {'batch_size': 100, 'dataset': {'name': 'CIFAR10', 'normalise_inputs': False, 'normalise_outputs': False, 'validation_proportion': 0.1}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 72, 'optimiser': {'acceleration_kwargs': {'inverse_func': 'samelson', 'modifier': 'sablonniere'}, 'acceleration_order': 1, 'acceleration_type': 'shanks', 'adaptive_update': True, 'damping_min': 1e-08, 'hessian_damping_factor': 0.9, 'initial_damping': 1, 'initial_scale_factor': 100, 'name': 'HessianSeriesOptimiser', 'num_update_steps': 20, 'scale_factor_method': 'lower_bound', 'scale_factor_multiplier': 1.0, 'series_name': 'damped'}, 'run_group_name': 'tb_ASHA_cheap_cifar-10_OursAdaptive', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.00010366071837176235, 'root.batch_size': 3200, 'shanks_acceleration_dict': {'acceleration_order': 2, 'num_update_steps': 5}}
ASHA_Time_tb_ASHA_cheap_cifar-10_OursDampedUnadaptive_OursDampedUnadaptive
training_loss: 1.373798
test_loss: 1.512480
validation_loss: 1.523573
{'master_config': {'batch_size': 100, 'dataset': {'name': 'CIFAR10', 'normalise_inputs': False, 'normalise_outputs': False, 'validation_proportion': 0.1}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 72, 'optimiser': {'acceleration_kwargs': {'inverse_func': 'samelson', 'modifier': 'sablonniere'}, 'acceleration_order': 1, 'acceleration_type': 'shanks', 'initial_damping': 0.0001, 'initial_scale_factor': 100, 'learning_rate': 1.0, 'momentum': 0.5, 'name': 'HessianSeriesOptimiser', 'num_update_steps': 20, 'scale_factor_method': 'lower_bound', 'scale_factor_multiplier': 1.0, 'series_name': 'damped'}, 'run_group_name': 'tb_ASHA_cheap_cifar-10_OursDampedUnadaptive', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.initial_damping': 0.002444657699707894, 'opt.learning_rate': 0.5620074744078232, 'opt.momentum': 0.1151915637937693, 'root.batch_size': 3200, 'shanks_acceleration_dict': {'acceleration_order': 0, 'num_update_steps': 2}}
ASHA_Time_tb_ASHA_cheap_cifar-10_SGD_SGD
training_loss: 1.006781
test_loss: 1.571846
validation_loss: 1.555225
{'master_config': {'batch_size': 100, 'dataset': {'name': 'CIFAR10', 'normalise_inputs': False, 'normalise_outputs': False, 'validation_proportion': 0.1}, 'device': None, 'forward_pass_extra_kwargs': ['is_training'], 'load_state': None, 'log_root': './runs', 'loss': {'name': 'cross_entropy_loss', 'num_classes': 10}, 'model': {'name': 'ResNet18', 'num_classes': 10}, 'num_epochs': 72, 'optimiser': {'learning_rate': 0.1, 'name': 'sgd'}, 'run_group_name': 'tb_ASHA_cheap_cifar-10_SGD', 'run_name': None, 'save_state': None, 'seed': None, 'validation_proportion': 0}, 'opt.learning_rate': 0.0041140162885593655, 'root.batch_size': 3200}
