CONFIG
├── train
│   └── seed: 2222                                                                                                        
│       interval: step                                                                                                    
│       monitor: val/next_step_sharpe_ratio_positive                                                                      
│       mode: max                                                                                                         
│       ema: 0.0                                                                                                          
│       test: false                                                                                                       
│       debug: false                                                                                                      
│       ignore_warnings: false                                                                                            
│       state:                                                                                                            
│         mode: null                                                                                                      
│         n_context: 0                                                                                                    
│         n_context_eval: 0                                                                                               
│       ckpt: null                                                                                                        
│       disable_dataset: false                                                                                            
│       validate_at_start: false                                                                                          
│       pretrained_model_path: /home/epsteine/safari-dev/outputs/outputs/2025-04-27/1998_mha/last.ckpt                    
│       pretrained_model_strict_load: true                                                                                
│       pretrained_model_state_hook:                                                                                      
│         _name_: null                                                                                                    
│       post_init_hook:                                                                                                   
│         _name_: null                                                                                                    
│       layer_decay:                                                                                                      
│         _name_: null                                                                                                    
│         decay: 0.7                                                                                                      
│                                                                                                                         
├── tolerance
│   └── logdir: ./resume                                                                                                  
│       id: null                                                                                                          
│                                                                                                                         
├── wandb
│   └── project: equities-set-seq-model                                                                                   
│       group: ''                                                                                                         
│       job_type: training                                                                                                
│       mode: online                                                                                                      
│       name: equities_transformer                                                                                        
│       save_dir: .                                                                                                       
│       id: equities_transformer                                                                                          
│                                                                                                                         
├── trainer
│   └── _target_: pytorch_lightning.Trainer                                                                               
│       devices: 1                                                                                                        
│       accelerator: gpu                                                                                                  
│       accumulate_grad_batches: 1                                                                                        
│       max_epochs: 30                                                                                                    
│       gradient_clip_val: 0.0                                                                                            
│       log_every_n_steps: 10                                                                                             
│       limit_train_batches: 1.0                                                                                          
│       limit_val_batches: 1.0                                                                                            
│                                                                                                                         
├── loader
│   └── batch_size: 1                                                                                                     
│       num_workers: 0                                                                                                    
│       pin_memory: false                                                                                                 
│       drop_last: true                                                                                                   
│                                                                                                                         
├── dataset
│   └── _name_: equities_dataset                                                                                          
│       C: 10                                                                                                             
│       A: 2                                                                                                              
│       alpha_p: 0.05                                                                                                     
│       beta_p: 0.6                                                                                                       
│       alpha_sp: 0.15                                                                                                    
│       beta_sp: 0.6                                                                                                      
│       num_samples: 20                                                                                                   
│       kappa_p: 0.8                                                                                                      
│       kappa_sp: 0.6                                                                                                     
│       num_seq: 100                                                                                                      
│       val_split: 0.1                                                                                                    
│       test_split: 0.1                                                                                                   
│       dataset_name: equities_dataset                                                                                    
│       debug: true                                                                                                       
│       debug_data_size: 3                                                                                                
│       seed: 42                                                                                                          
│       num_states: 77                                                                                                    
│       dataset_config:                                                                                                   
│         daily_price_data_path: /share/data/llm_mortgages/equities_data/data/bllp/daily_price_data_n_equities_4000.npz   
│         train_beg_date: 20110101                                                                                        
│         train_end_date: 20181231                                                                                        
│         val_beg_date: 20190101                                                                                          
│         val_end_date: 20191231                                                                                          
│         test_beg_date: 20200101                                                                                         
│         test_end_date: 20201231                                                                                         
│       train_frac: 0.8                                                                                                   
│       val_frac: 0.1                                                                                                     
│       test_frac: 0.1                                                                                                    
│       alpha_price: 100                                                                                                  
│       alpha_vol: 0.5                                                                                                    
│       sequence_length: 246                                                                                              
│       stocks_sample: 500                                                                                                
│       nr_loans_to_sample: 500                                                                                           
│       load_data: true                                                                                                   
│       save_data: false                                                                                                  
│       data_path: /home/epsteine/safari-dev/data/equities/equity_dataset_2019.npz                                        
│       steps_per_epoch: 10                                                                                               
│       feature_set:                                                                                                      
│       - return                                                                                                          
│       - suv                                                                                                             
│       - beta                                                                                                            
│       - spread                                                                                                          
│       - residual_volatility                                                                                             
│       - return_1week                                                                                                    
│       - ST_Rev                                                                                                          
│       - r2_1                                                                                                            
│       - r12_2                                                                                                           
│       - r12_7                                                                                                           
│       - r36_13                                                                                                          
│       - Rel2High                                                                                                        
│       - volume                                                                                                          
│       - risk_free_rate                                                                                                  
│       - yearly_accounting_vars                                                                                          
│       - trailing_volatility                                                                                             
│                                                                                                                         
├── task
│   └── _name_: base                                                                                                      
│       loss: next_step_sharpe_ratio                                                                                      
│       metrics:                                                                                                          
│       - next_step_sharpe_ratio                                                                                          
│       - next_step_sharpe_ratio_positive                                                                                 
│       - sharpe_ratio_market                                                                                             
│       - compute_portfolio_metrics                                                                                       
│       - next_step_sharpe_ratio_with_transaction_cost_positive                                                           
│       torchmetrics: null                                                                                                
│                                                                                                                         
├── optimizer
│   └── _name_: adamw                                                                                                     
│       lr: 0.003                                                                                                         
│       weight_decay: 0.05                                                                                                
│       betas:                                                                                                            
│       - 0.9                                                                                                             
│       - 0.999                                                                                                           
│                                                                                                                         
├── scheduler
│   └── _name_: cosine_warmup                                                                                             
│       num_warmup_steps: 30.0                                                                                            
│       num_training_steps: 300.0                                                                                         
│                                                                                                                         
├── callbacks
│   └── learning_rate_monitor:                                                                                            
│         logging_interval: step                                                                                          
│       timer:                                                                                                            
│         step: true                                                                                                      
│         inter_step: false                                                                                               
│         epoch: true                                                                                                     
│         val: true                                                                                                       
│       params:                                                                                                           
│         total: true                                                                                                     
│         trainable: true                                                                                                 
│         fixed: true                                                                                                     
│       model_checkpoint:                                                                                                 
│         monitor: val/next_step_sharpe_ratio_positive                                                                    
│         mode: max                                                                                                       
│         save_top_k: 1                                                                                                   
│         save_last: true                                                                                                 
│         dirpath: checkpoints/                                                                                           
│         filename: step_{step}                                                                                           
│         auto_insert_metric_name: false                                                                                  
│         verbose: true                                                                                                   
│                                                                                                                         
├── decoder
│   └── _name_: tranistion_step_decoder                                                                                   
│       hidden_dim: 64                                                                                                    
│       num_states: 1                                                                                                     
│       lookback_horizon: 25                                                                                              
│       forecast: false                                                                                                   
│       forecast_horizon: 5                                                                                               
│       loan_pool_size: 1                                                                                                 
│       scale_output: true                                                                                                
│       output_scale: 600                                                                                                 
│       l1_normalize: true                                                                                                
│                                                                                                                         
├── model
│   └── layer:                                                                                                            
│         _name_: mha                                                                                                     
│         channels: 1                                                                                                     
│         causal: true                                                                                                    
│         lam: 0.001                                                                                                      
│         kernel_dropout: 0.1                                                                                             
│         bidirectional: false                                                                                            
│         activation: gelu                                                                                                
│         postact: glu                                                                                                    
│         initializer: null                                                                                               
│         weight_norm: false                                                                                              
│         tie_dropout: true                                                                                               
│         l_max: 246                                                                                                      
│         verbose: true                                                                                                   
│         learning_rate: 0.001                                                                                            
│         weight_init: double_exp                                                                                         
│         use_set_mixing: false                                                                                           
│         set_mixing_architecture: MLP                                                                                    
│         nr_layers_with_set: 1                                                                                           
│         set_mixing_dropout: 0.1                                                                                         
│         set_feature_embedding_dim: 5                                                                                    
│         set_debug: true                                                                                                 
│         set_nr_attn_heads: 1                                                                                            
│         kernel_len: 30                                                                                                  
│         n_heads: 8                                                                                                      
│         dropout: null                                                                                                   
│         bias: true                                                                                                      
│         add_bias_kv: false                                                                                              
│         add_zero_attn: false                                                                                            
│         kdim: null                                                                                                      
│         vdim: null                                                                                                      
│       _name_: model                                                                                                     
│       prenorm: false                                                                                                    
│       transposed: false                                                                                                 
│       n_layers: 6                                                                                                       
│       d_model: 64                                                                                                       
│       residual: R                                                                                                       
│       pool:                                                                                                             
│         _name_: pool                                                                                                    
│         stride: 1                                                                                                       
│         expand: null                                                                                                    
│       norm: layer                                                                                                       
│       dropout: 0.1                                                                                                      
│       tie_dropout: true                                                                                                 
│       track_norms: true                                                                                                 
│       encoder: null                                                                                                     
│       decoder: null                                                                                                     
│                                                                                                                         
└── encoder
    └── _name_: set_encoder                                                                                               
        chunk_size: 3                                                                                                     
        architecture: MLP                                                                                                 
        nr_attention_heads: 1                                                                                             
        feature_embedding_dim: 5                                                                                          
        n_attn_summary_statistics: true                                                                                   
        dropout: 0.1                                                                                                      
        debug: true                                                                                                       
                                                                                                                          
