log_level: INFO
n_bootstrap: 1_000
plot_format: png

weighting:
  - weight_col: equal_task_weight
    graph_snippet: Equally weighted tasks
  - weight_col: invsqrt_task_weight
    graph_snippet: Tasks diversity-weighted
  - weight_col: null
    graph_snippet: "None"

agent_summary:
  agents:
    - Claude 3 Opus
    - Claude 3.5 Sonnet (Old)
    - Claude 3.5 Sonnet (New)
    - Claude 3.7 Sonnet
    - o1-preview
    - o1
    - o3
    - GPT-4o
    - GPT-4 Turbo
    - GPT-4 0314
    - GPT-4 0125
    - GPT-4 1106
    - davinci-002 (GPT-3)
    - gpt-3.5-turbo-instruct
    - GPT-2

plots:
  suptitle_fontsize: 14
  xlabelpad: 10
  ylabelpad: 10
  ax_label_fontsize: 14
  title_fontsize: 16
  annotation_fontsize: 14
  xtick_labelsize: 14
  ytick_labelsize: 14
  legend_fontsize: 14

  task_distribution_styling:
    hist:
      edgecolor: "#a6a6a6"
      color: "#d4d4d4"
      alpha: 1
      linewidth: 1
      zorder: 50
    grid: &grid_styling
      which: "major"
      linestyle: "-"
      alpha: 0.2
      color: "grey"

  scatter_styling:
    error_bar:
      color: "grey"
      fmt: "none"
      capsize: 2
      alpha: 1
      zorder: 9
      linewidth: 1.5
      capthick: 1.5
    grid: *grid_styling
    scatter:
      s: 150
      edgecolor: "black"
      linewidth: 0.5
      zorder: 10

  task_source_styling:
    "RE-Bench":
      color: "#e26e2f"
    "HCAST":
      color: "#3483eb"
    "SWAA":
      color: "#3e805f"

  agent_styling:
    "Claude 3.7 Sonnet":
      lab_color: &anthropic_color "#3e805f"
      marker: "D"
      unique_color: "#9C5EDA"
    "Claude 3.5 Sonnet (New)":
      lab_color: *anthropic_color
      marker: "s"
      unique_color: "#8B4DC9"
    "Claude 3.5 Sonnet (Old)":
      lab_color: *anthropic_color
      marker: "^"
      unique_color: "#9B6BE0"
    "Claude 3 Opus":
      lab_color: *anthropic_color
      marker: "o"
      unique_color: "#B594E8"
    "o3":
      lab_color: &openai_color "#3e805f"
      marker: "*"
      unique_color: "#42AB42"
    "o4-mini":
      lab_color: *openai_color
      marker: "h"
      unique_color: "#329B32"
    "o1":
      lab_color: *openai_color
      marker: "P"
      unique_color: "#228B22"
    "o1-preview":
      lab_color: *openai_color
      marker: "X"
      unique_color: "#3CB371"
    "GPT-4o":
      lab_color: *openai_color
      marker: "d"
      unique_color: "#2B8FB0"
    "GPT-4 Turbo":
      lab_color: *openai_color
      marker: "v"
      unique_color: "#4A9CBD"
    "GPT-4 0125":
      lab_color: *openai_color
      marker: "P"
      unique_color: "#2B8FB0"
    "GPT-4 1106":
      lab_color: *openai_color
      marker: "D"
      unique_color: "#87CEEB"
    "GPT-4 0314":
      lab_color: *openai_color
      marker: "s"
      unique_color: "#87CEEB"
    "gpt-3.5-turbo-instruct":
      lab_color: *openai_color
      marker: "^"
      unique_color: "#CCE6FF"
    "davinci-002 (GPT-3)":
      lab_color: *openai_color
      marker: "o"
      unique_color: "#B3E0FF"
    "GPT-2":
      lab_color: *openai_color
      marker: "*"
      unique_color: "#CCE6FF"
    "human":
      lab_color: "grey"
      marker: "o"
      unique_color: "#858585"
    "best human for each task":
      lab_color: "gold"
      marker: "o"
      unique_color: "#FFD700"
    "default":
      lab_color: "black"
      marker: "o"
      unique_color: "black"
  performance_over_time_trendline_styling:
    "linear":
      line:
        color: "red"
        alpha: &trendline_alpha 0.5
        linewidth: &trendline_linewidth 2
    "exponential":
      line:
        color: "blue"
        alpha: *trendline_alpha
        linewidth: *trendline_linewidth
    "hyperbolic":
      line:
        color: "green"
        alpha: *trendline_alpha
        linewidth: *trendline_linewidth
    "default":
      line:
        color: "black"
        alpha: *trendline_alpha
        linewidth: *trendline_linewidth

  legend_order:
    - Claude 3.7 Sonnet
    - Claude 3.5 Sonnet (New)
    - Claude 3.5 Sonnet (Old)
    - Claude 3 Opus
    - o3
    - o4-mini
    - o1
    - o1-preview
    - GPT-4o
    - GPT-4 Turbo
    - GPT-4 0125
    - GPT-4 1106
    - GPT-4 0314
    - gpt-3.5-turbo-instruct
    - davinci-002 (GPT-3)
    - GPT-2
  plot_cost:
    include_agents:
      - Claude 3.7 Sonnet
      - Claude 3.5 Sonnet (New)
      - Claude 3.5 Sonnet (Old)
      - Claude 3 Opus
      - o1
      - o1-preview
      - GPT-4o
      - GPT-4 Turbo
      - GPT-4 1106
      - GPT-4 0314
      - gpt-3.5-turbo-instruct
      - davinci-002 (GPT-3)
      - GPT-2
