defaults:
  - base_model
  - metric: q4l_default_metric
  - loss: mse

model_type: rl
output_type: position
name: BaseRLPortfolioModel
basic_info:
  output_dim: 1
  batch_size: 300
  max_step: 1000
  monitor: valid_ret
trainer:
  max_epochs: 100
components:
  agent:
    name: DDPGAgent
    module_path: q4l.model.rl.agent
    kwargs:
      actor_optimizer:
        name: Adam
        module_path: torch.optim
        kwargs:
          lr: 1e-3
      critic_optimizer:
        name: Adam
        module_path: torch.optim
        kwargs:
          lr: 1e-3
      gamma: 0.99
  env:
    name: Q4LPortfolioEnv  # Placeholder
    module_path: q4l.model.rl.env
    kwargs:
      executor_config: 
        name: SimulatorExecutor
        module_path: q4l.qlib.backtest.executor
        kwargs:
          time_per_step: day
          generate_portfolio_metrics: true
      exchange_config: ${experiment.collector.zoo.backtest_zoo.exchange.${experiment.data.region}}
      account: 1.0e+8
      benchmark: ${experiment.data.benchmark}

  replay_buffer:
    name: ReplayBuffer
    module_path: q4l.model.rl.replay_buffer
    kwargs:
      max_size: 1500
