[ ('config_version', 5),
  # data
  ('file_encoding', 'utf-8'),
  ('word_based', True),
  ('episodic', False),
  # model
  ('num_params', -1),
  ('share_input_and_output_embeddings', False),
  ('input_embedding_size', 200),
  ('output_embedding_size', -1),
  ('input_embedding_ratio', 1.0),
  ('output_embedding_ratio', -1.0),
  ('mos_num_components', 0),
  ('token_dropout', 0.0),
  ('embedding_dropout', 0.0),
  ('input_dropout', 0.047444744474447444),
  ('output_dropout', 0.19081908190819083),
  ('downprojected_output_dropout', -1.0),
  ('shared_mask_dropout', False),
  ('embed_once', True),
  # cell
  ('model', 'lstm'),
  ('num_layers', 1),
  ('residual_connections', False),
  ('lstm_skip_connection', False),
  ('feature_mask_rounds', 0),
  ('feature_mask_rank', 0),
  ('feature_mask', False),
  ('sparsity_ratio', -1.0),
  ('overlay_rank', -1),
  ('hidden_size', [1024]),
  ('hidden_size_multiplier', 1.0),
  ('layer_norm', False),
  ('activation_fn', 'tf.tanh'),
  ('tie_forget_and_input_gates', False),
  ('cap_input_gate', False),
  ('trainable_initial_state', False),
  ('inter_layer_dropout', 0.3801980198019802),
  ('state_dropout', 0.1854985498549855),
  ('state_dropout_flip_rate', 0.0),
  ('update_dropout', 0.0),
  ('cell_clip', -1.0),
  # objective
  ('model_average', 'arithmetic'),
  ('num_training_samples', 1),
  ('l2_penalty', 0.00026324632463246324),
  ('l1_penalty', 0.0),
  ('activation_norm_penalty', 0.0),
  ('drop_state_probability', 0.0),
  # initialization
  ('embedding_init_factor', 1.0),
  ('scale_input_embeddings', False),
  ('cell_init_factor', 1.0),
  ('forget_bias', 1.0),
  ('output_init_factor', 1.0),
  # schedule
  ('steps_per_turn', 200),
  ('print_training_stats_every_num_steps', 200),
  ('turns', 1000),
  # optimization
  ('optimizer_type', 'adam'),
  ('rmsprop_beta2', 0.999),
  ('rmsprop_epsilon', 1e-08),
  ('adam_beta1', 0.9),
  ('adam_beta2', 0.999),
  ('adam_epsilon', 1e-08),
  ('batch_size', 128),
  ('accum_batch_size', -1),
  ('max_grad_norm', 1.0),
  ('max_time_steps', 150),
  ('trigger_averaging_turns', 25),
  ('trigger_averaging_at_the_latest', 400),
  # learning rate
  ('learning_rate', 0.0006582658265826583),
  ('learning_rate_decay', 1.0),
  ('learning_rate_decay_burn_in_steps', 0),
  ('drop_learning_rate_turns', -1),
  ('drop_learning_rate_multiplier', 1.0),
  ('drop_learning_rate_at_the_latest', -1),
  # early stopping
  ('early_stopping_turns', -1),
  ('early_stopping_rampup_turns', 0),
  ('early_stopping_worst_xe_target', ''),
  ('early_stopping_slowest_rate', 0.0),
  # cross-validation
  ('crossvalidate', False),
  ('crossvalidation_folds', 10),
  ('crossvalidation_rounds', 1),
  # evaluation
  ('max_training_eval_batches', 20),
  ('max_eval_eval_batches', -1),
  ('max_test_eval_batches', -1),
  ('min_non_episodic_eval_examples_per_stripe', 100),
  ('eval_on_test', False),
  ('eval_method', 'deterministic'),
  ('num_eval_samples', 0),
  ('eval_softmax_temperature', -0.8),
  ('eval_power_mean_power', 1.0),
  ('eval_dropout_multiplier', 1.0),
  # experiments
  # checkpoints
  ('save_checkpoints', True),
  # misc
  ('seed', 1),
  ('swap_memory', False),
  ('log_device_placement', False),
  ('summary_flush_secs', 120),
]
