CONFIG
├── train
│   └── seed: 1                                                                                                                   
│       interval: step                                                                                                            
│       monitor: val/next_step_sharpe_ratio_positive                                                                              
│       mode: max                                                                                                                 
│       ema: 0.0                                                                                                                  
│       test: false                                                                                                               
│       debug: false                                                                                                              
│       ignore_warnings: false                                                                                                    
│       state:                                                                                                                    
│         mode: null                                                                                                              
│         n_context: 0                                                                                                            
│         n_context_eval: 0                                                                                                       
│       ckpt: null                                                                                                                
│       disable_dataset: false                                                                                                    
│       validate_at_start: false                                                                                                  
│       pretrained_model_path: /home/epsteine/safari-dev/outputs/outputs/2025-05-15/2016_set_MLP_seed_1/last.ckpt                 
│       pretrained_model_strict_load: true                                                                                        
│       pretrained_model_state_hook:                                                                                              
│         _name_: null                                                                                                            
│       post_init_hook:                                                                                                           
│         _name_: null                                                                                                            
│       layer_decay:                                                                                                              
│         _name_: null                                                                                                            
│         decay: 0.7                                                                                                              
│                                                                                                                                 
├── tolerance
│   └── logdir: ./resume                                                                                                          
│       id: null                                                                                                                  
│                                                                                                                                 
├── wandb
│   └── project: equities-set-seq-model                                                                                           
│       group: ''                                                                                                                 
│       job_type: training                                                                                                        
│       mode: online                                                                                                              
│       name: equities_set_seq_seed1                                                                                              
│       save_dir: .                                                                                                               
│       id: equities_set_seq_seed1                                                                                                
│                                                                                                                                 
├── trainer
│   └── _target_: pytorch_lightning.Trainer                                                                                       
│       devices: 1                                                                                                                
│       accelerator: gpu                                                                                                          
│       accumulate_grad_batches: 1                                                                                                
│       max_epochs: 30                                                                                                            
│       gradient_clip_val: 0.0                                                                                                    
│       log_every_n_steps: 10                                                                                                     
│       limit_train_batches: 1.0                                                                                                  
│       limit_val_batches: 1.0                                                                                                    
│                                                                                                                                 
├── loader
│   └── batch_size: 1                                                                                                             
│       num_workers: 0                                                                                                            
│       pin_memory: false                                                                                                         
│       drop_last: true                                                                                                           
│                                                                                                                                 
├── dataset
│   └── _name_: equities_dataset                                                                                                  
│       C: 10                                                                                                                     
│       A: 2                                                                                                                      
│       alpha_p: 0.05                                                                                                             
│       beta_p: 0.6                                                                                                               
│       alpha_sp: 0.15                                                                                                            
│       beta_sp: 0.6                                                                                                              
│       num_samples: 20                                                                                                           
│       kappa_p: 0.8                                                                                                              
│       kappa_sp: 0.6                                                                                                             
│       num_seq: 100                                                                                                              
│       val_split: 0.1                                                                                                            
│       test_split: 0.1                                                                                                           
│       dataset_name: equities_dataset                                                                                            
│       debug: true                                                                                                               
│       debug_data_size: 3                                                                                                        
│       seed: 42                                                                                                                  
│       num_states: 77                                                                                                            
│       dataset_config:                                                                                                           
│         daily_price_data_path: /share/data/llm_mortgages/equities_data/data/bllp/daily_price_data_n_equities_4000.npz           
│         train_beg_date: 20110101                                                                                                
│         train_end_date: 20181231                                                                                                
│         val_beg_date: 20190101                                                                                                  
│         val_end_date: 20191231                                                                                                  
│         test_beg_date: 20200101                                                                                                 
│         test_end_date: 20201231                                                                                                 
│       train_frac: 0.8                                                                                                           
│       val_frac: 0.1                                                                                                             
│       test_frac: 0.1                                                                                                            
│       alpha_price: 100                                                                                                          
│       alpha_vol: 0.5                                                                                                            
│       sequence_length: 246                                                                                                      
│       stocks_sample: 500                                                                                                        
│       nr_loans_to_sample: 500                                                                                                   
│       load_data: true                                                                                                           
│       save_data: false                                                                                                          
│       data_path: /home/epsteine/safari-dev//data/equities/equity_dataset_2019.npz                                               
│       steps_per_epoch: 50                                                                                                       
│       feature_set:                                                                                                              
│       - return                                                                                                                  
│       - suv                                                                                                                     
│       - beta                                                                                                                    
│       - spread                                                                                                                  
│       - residual_volatility                                                                                                     
│       - return_1week                                                                                                            
│       - ST_Rev                                                                                                                  
│       - r2_1                                                                                                                    
│       - r12_2                                                                                                                   
│       - r12_7                                                                                                                   
│       - r36_13                                                                                                                  
│       - Rel2High                                                                                                                
│       - volume                                                                                                                  
│       - risk_free_rate                                                                                                          
│       - yearly_accounting_vars                                                                                                  
│       - trailing_volatility                                                                                                     
│                                                                                                                                 
├── task
│   └── _name_: base                                                                                                              
│       loss: next_step_sharpe_ratio                                                                                              
│       metrics:                                                                                                                  
│       - next_step_sharpe_ratio                                                                                                  
│       - next_step_sharpe_ratio_positive                                                                                         
│       - sharpe_ratio_market                                                                                                     
│       - compute_portfolio_metrics                                                                                               
│       - next_step_sharpe_ratio_with_transaction_cost_positive                                                                   
│       torchmetrics: null                                                                                                        
│                                                                                                                                 
├── optimizer
│   └── _name_: adamw                                                                                                             
│       lr: 0.003                                                                                                                 
│       weight_decay: 0.05                                                                                                        
│       betas:                                                                                                                    
│       - 0.9                                                                                                                     
│       - 0.999                                                                                                                   
│                                                                                                                                 
├── scheduler
│   └── _name_: cosine_warmup                                                                                                     
│       num_warmup_steps: 150.0                                                                                                   
│       num_training_steps: 1500.0                                                                                                
│                                                                                                                                 
├── callbacks
│   └── learning_rate_monitor:                                                                                                    
│         logging_interval: step                                                                                                  
│       timer:                                                                                                                    
│         step: true                                                                                                              
│         inter_step: false                                                                                                       
│         epoch: true                                                                                                             
│         val: true                                                                                                               
│       params:                                                                                                                   
│         total: true                                                                                                             
│         trainable: true                                                                                                         
│         fixed: true                                                                                                             
│       model_checkpoint:                                                                                                         
│         monitor: val/next_step_sharpe_ratio_positive                                                                            
│         mode: max                                                                                                               
│         save_top_k: 1                                                                                                           
│         save_last: true                                                                                                         
│         dirpath: checkpoints/                                                                                                   
│         filename: step_{step}                                                                                                   
│         auto_insert_metric_name: false                                                                                          
│         verbose: true                                                                                                           
│                                                                                                                                 
├── decoder
│   └── _name_: tranistion_step_decoder                                                                                           
│       hidden_dim: 64                                                                                                            
│       num_states: 1                                                                                                             
│       lookback_horizon: 25                                                                                                      
│       forecast: false                                                                                                           
│       forecast_horizon: 5                                                                                                       
│       loan_pool_size: 1                                                                                                         
│       scale_output: true                                                                                                        
│       output_scale: 600                                                                                                         
│       l1_normalize: true                                                                                                        
│                                                                                                                                 
├── model
│   └── layer:                                                                                                                    
│         _name_: long-conv                                                                                                       
│         channels: 1                                                                                                             
│         causal: true                                                                                                            
│         lam: 0.001                                                                                                              
│         kernel_dropout: 0.1                                                                                                     
│         bidirectional: false                                                                                                    
│         activation: gelu                                                                                                        
│         postact: glu                                                                                                            
│         initializer: null                                                                                                       
│         weight_norm: false                                                                                                      
│         tie_dropout: true                                                                                                       
│         l_max: 246                                                                                                              
│         verbose: true                                                                                                           
│         learning_rate: 0.001                                                                                                    
│         weight_init: random                                                                                                     
│         use_set_mixing: true                                                                                                    
│         set_mixing_architecture: MLP                                                                                            
│         nr_layers_with_set: 6                                                                                                   
│         set_mixing_dropout: 0.1                                                                                                 
│         set_feature_embedding_dim: 5                                                                                            
│         set_debug: false                                                                                                        
│         set_nr_attn_heads: 1                                                                                                    
│         kernel_len: 30                                                                                                          
│       _name_: model                                                                                                             
│       prenorm: false                                                                                                            
│       transposed: false                                                                                                         
│       n_layers: 6                                                                                                               
│       d_model: 64                                                                                                               
│       residual: R                                                                                                               
│       pool:                                                                                                                     
│         _name_: pool                                                                                                            
│         stride: 1                                                                                                               
│         expand: null                                                                                                            
│       norm: layer                                                                                                               
│       dropout: 0.1                                                                                                              
│       tie_dropout: true                                                                                                         
│       track_norms: true                                                                                                         
│       encoder: null                                                                                                             
│       decoder: null                                                                                                             
│                                                                                                                                 
└── encoder
    └── _name_: set_encoder                                                                                                       
        chunk_size: 3                                                                                                             
        architecture: MLP                                                                                                         
        nr_attention_heads: 1                                                                                                     
        feature_embedding_dim: 5                                                                                                  
        n_attn_summary_statistics: true                                                                                           
        dropout: 0.1                                                                                                              
        debug: false                                                                                                              
                                                                                                                                  
