defaults:
  - training: reproducibility
  - predictive_model: transformer_nctx33
  - generative_process: 3mess3_2tom_quantum
  - metric_tracker@training_metric_tracker: full
  - metric_tracker@eval_metric_tracker: loss_only
  - activation_tracker: simple_tracker
  - logging: print_logger
  - persistence: local_pytorch
  - optimizer: pytorch_adam
  - lr_scheduler: windowed_plateau
  - _self_

experiment_name: factored_world_hypothesis
run_name: transformer_nctx33
seed: 77
weight_init_seed: 7
device: auto

tags:
  research_step: training
  retention: ephemeral
  strict: "true"

experiment_tags:
  project: factored_world_hypothesis

visualization:
  every: 10000
  cev:
    enabled: true
    colorscale: Archival
    max_components: null
    show_rangeslider: true
  belief_regression:
    enabled: true
    max_samples: 10000

# Reduced batch size for longer context
training:
  batch_size: 4096
