# path to the task data directory, all path will be overridden
data_dir: ../../../../data/

# Task name required, should be consistent with desc_file and data_dir
name: titanic 
# either provide a path to a plaintext file describing the task
desc_file: ../../../../data/titanic/data/public/description.txt
# or provide the task goal (and optionally evaluation information) as arguments
goal: null
eval: null

log_dir: ../../../../outputs/titanic/logs
workspace_dir: ../../../../outputs/

# whether to unzip any archives in the data directory
preprocess_data: True
# whether to copy the data to the workspace directory (otherwise it will be symlinked)
# copying is recommended to prevent the agent from accidentally modifying the original data
copy_data: True

exp_name: null # a random experiment name will be generated if not provided

# settings for code execution
exec:
  timeout: 43200
  # agent_file_name: runfile.py
  # format_tb_ipython: False


# agent hyperparams
agent:
  # how many improvement iterations to run
  steps: 15
  # whether to instruct the agent to use CV (set to 1 to disable)
  k_fold_validation: 1
  # whether to instruct the agent to generate a prediction function
  expose_prediction: False
  # whether to provide the agent with a preview of the data
  data_preview: True

  # LLM settings for coding
  code:
    model_mode: gpt
    model_name: gpt-4o-mini
    port: 8314  # Port for local model
    max_completion_tokens: 8192  # Maximum tokens for completion
    max_prompt_tokens: 30000 
    api_idx: -1
    api_key: null # use environment variable OPENAI_API_KEY
    temperature: 0.0
    top_p: 1.0

  # hyperparameters for the tree search
  search:
    max_debug_depth: 3
    debug_prob: 0.5
    num_drafts: 3