{
  "$defs": {
    "SparseProbingEvalConfig": {
      "properties": {
        "random_seed": {
          "default": 42,
          "description": "Random seed",
          "title": "Random Seed",
          "type": "integer"
        },
        "dataset_names": {
          "description": "List of dataset names. We have at most 5 class names in a single subset, which is why we have multiple bias_in_bios class subsets.",
          "items": {
            "type": "string"
          },
          "title": "Dataset Names",
          "type": "array"
        },
        "probe_train_set_size": {
          "default": 4000,
          "description": "Probe train set size",
          "title": "Probe Train Set Size",
          "type": "integer"
        },
        "probe_test_set_size": {
          "default": 1000,
          "description": "Probe test set size",
          "title": "Probe Test Set Size",
          "type": "integer"
        },
        "context_length": {
          "default": 128,
          "description": "The maximum length of each input to the LLM. Any longer inputs will be truncated, keeping only the beginning.",
          "title": "LLM Context Length",
          "type": "integer"
        },
        "sae_batch_size": {
          "default": 125,
          "description": "SAE batch size, inference only",
          "title": "SAE Batch Size",
          "type": "integer"
        },
        "llm_batch_size": {
          "anyOf": [
            {
              "type": "integer"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "LLM batch size. This is set by default in the main script, or it can be set with a command line argument.",
          "title": "LLM Batch Size"
        },
        "llm_dtype": {
          "default": "",
          "description": "LLM data type. This is set by default in the main script, or it can be set with a command line argument.",
          "title": "LLM Data Type",
          "type": "string"
        },
        "model_name": {
          "default": "",
          "description": "Model name. Must be set with a command line argument.",
          "title": "Model Name",
          "type": "string"
        },
        "k_values": {
          "description": "K represents the number of SAE features or residual stream channels we train the linear probe on. We iterate over all values of K.",
          "items": {
            "type": "integer"
          },
          "title": "K Values",
          "type": "array"
        },
        "lower_vram_usage": {
          "default": false,
          "description": "Lower GPU memory usage by doing more computation on the CPU. Useful on 1M width SAEs. Will be slower and require more system memory.",
          "title": "Lower Memory Usage",
          "type": "boolean"
        }
      },
      "title": "SparseProbingEvalConfig",
      "type": "object"
    },
    "SparseProbingLlmMetrics": {
      "properties": {
        "llm_test_accuracy": {
          "description": "Linear probe accuracy when training on the full LLM residual stream",
          "title": "LLM Test Accuracy",
          "type": "number",
          "ui_default_display": true
        },
        "llm_top_1_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 1 residual stream channel test accuracy",
          "title": "LLM Top 1 Test Accuracy",
          "ui_default_display": true
        },
        "llm_top_2_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 2 residual stream channels test accuracy",
          "title": "LLM Top 2 Test Accuracy",
          "ui_default_display": true
        },
        "llm_top_5_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 5 residual stream channels test accuracy",
          "title": "LLM Top 5 Test Accuracy",
          "ui_default_display": true
        },
        "llm_top_10_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 10 residual stream channels",
          "title": "LLM Top 10 Test Accuracy"
        },
        "llm_top_20_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 20 residual stream channels",
          "title": "LLM Top 20 Test Accuracy"
        },
        "llm_top_50_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 50 residual stream channels",
          "title": "LLM Top 50 Test Accuracy"
        },
        "llm_top_100_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 100 residual stream channels",
          "title": "LLM Top 100 Test Accuracy"
        }
      },
      "required": [
        "llm_test_accuracy"
      ],
      "title": "SparseProbingLlmMetrics",
      "type": "object"
    },
    "SparseProbingMetricCategories": {
      "properties": {
        "llm": {
          "$ref": "#/$defs/SparseProbingLlmMetrics",
          "description": "LLM metrics",
          "title": "LLM",
          "ui_default_display": true
        },
        "sae": {
          "$ref": "#/$defs/SparseProbingSaeMetrics",
          "description": "SAE metrics",
          "title": "SAE",
          "ui_default_display": true
        }
      },
      "required": [
        "llm",
        "sae"
      ],
      "title": "SparseProbingMetricCategories",
      "type": "object"
    },
    "SparseProbingResultDetail": {
      "properties": {
        "dataset_name": {
          "description": "Dataset name",
          "title": "Dataset Name",
          "type": "string"
        },
        "llm_test_accuracy": {
          "description": "Linear probe accuracy when trained on all LLM residual stream channels",
          "title": "LLM Test Accuracy",
          "type": "number"
        },
        "llm_top_1_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 1 residual stream channels",
          "title": "LLM Top 1 Test Accuracy"
        },
        "llm_top_2_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 2 residual stream channels",
          "title": "LLM Top 2 Test Accuracy"
        },
        "llm_top_5_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 5 residual stream channels",
          "title": "LLM Top 5 Test Accuracy"
        },
        "llm_top_10_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 10 residual stream channels",
          "title": "LLM Top 10 Test Accuracy"
        },
        "llm_top_20_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 20 residual stream channels",
          "title": "LLM Top 20 Test Accuracy"
        },
        "llm_top_50_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 50 residual stream channels",
          "title": "LLM Top 50 Test Accuracy"
        },
        "llm_top_100_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the LLM top 100 residual stream channels",
          "title": "LLM Top 100 Test Accuracy"
        },
        "sae_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on all SAE latents",
          "title": "SAE Test Accuracy"
        },
        "sae_top_1_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 1 SAE latents",
          "title": "SAE Top 1 Test Accuracy"
        },
        "sae_top_2_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 2 SAE latents",
          "title": "SAE Top 2 Test Accuracy"
        },
        "sae_top_5_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 5 SAE latents",
          "title": "SAE Top 5 Test Accuracy"
        },
        "sae_top_10_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 10 SAE latents",
          "title": "SAE Top 10 Test Accuracy"
        },
        "sae_top_20_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 20 SAE latents",
          "title": "SAE Top 20 Test Accuracy"
        },
        "sae_top_50_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 50 SAE latents",
          "title": "SAE Top 50 Test Accuracy"
        },
        "sae_top_100_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 100 SAE latents",
          "title": "SAE Top 100 Test Accuracy"
        }
      },
      "required": [
        "dataset_name",
        "llm_test_accuracy"
      ],
      "title": "SparseProbingResultDetail",
      "type": "object"
    },
    "SparseProbingSaeMetrics": {
      "properties": {
        "sae_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on all SAE latents",
          "title": "SAE Test Accuracy",
          "ui_default_display": true
        },
        "sae_top_1_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 1 SAE latents",
          "title": "SAE Top 1 Test Accuracy",
          "ui_default_display": true
        },
        "sae_top_2_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 2 SAE latents",
          "title": "SAE Top 2 Test Accuracy",
          "ui_default_display": true
        },
        "sae_top_5_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 5 SAE latents",
          "title": "SAE Top 5 Test Accuracy",
          "ui_default_display": true
        },
        "sae_top_10_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 10 SAE latents",
          "title": "SAE Top 10 Test Accuracy"
        },
        "sae_top_20_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 20 SAE latents",
          "title": "SAE Top 20 Test Accuracy"
        },
        "sae_top_50_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 50 SAE latents",
          "title": "SAE Top 50 Test Accuracy"
        },
        "sae_top_100_test_accuracy": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Linear probe accuracy when trained on the top 100 SAE latents",
          "title": "SAE Top 100 Test Accuracy"
        }
      },
      "title": "SparseProbingSaeMetrics",
      "type": "object"
    }
  },
  "description": "An evaluation using SAEs to probe for supervised concepts in LLMs. We use sparse probing with the top K SAE latents and probe for over 30 different classes across 5 datasets.",
  "properties": {
    "eval_type_id": {
      "default": "sparse_probing",
      "title": "Eval Type Id",
      "type": "string"
    },
    "eval_config": {
      "$ref": "#/$defs/SparseProbingEvalConfig",
      "description": "The configuration of the evaluation.",
      "title": "Eval Config Type"
    },
    "eval_id": {
      "description": "A unique UUID identifying this specific eval run",
      "title": "ID",
      "type": "string"
    },
    "datetime_epoch_millis": {
      "description": "The datetime of the evaluation in epoch milliseconds",
      "title": "DateTime (epoch ms)",
      "type": "integer"
    },
    "eval_result_metrics": {
      "$ref": "#/$defs/SparseProbingMetricCategories",
      "description": "The metrics of the evaluation, organized by category. Define your own categories and the metrics that go inside them.",
      "title": "Result Metrics Categorized"
    },
    "eval_result_details": {
      "description": "Each object is a stat on the sparse probing results for a dataset.",
      "items": {
        "$ref": "#/$defs/SparseProbingResultDetail"
      },
      "title": "Per-Dataset Sparse Probing Results",
      "type": "array"
    },
    "sae_bench_commit_hash": {
      "description": "The commit hash of the SAE Bench that ran the evaluation.",
      "title": "SAE Bench Commit Hash",
      "type": "string"
    },
    "sae_lens_id": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "description": "The ID of the SAE in SAE Lens.",
      "title": "SAE Lens ID"
    },
    "sae_lens_release_id": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "description": "The release ID of the SAE in SAE Lens.",
      "title": "SAE Lens Release ID"
    },
    "sae_lens_version": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "description": "The version of SAE Lens that ran the evaluation.",
      "title": "SAE Lens Version"
    },
    "sae_cfg_dict": {
      "anyOf": [
        {
          "type": "object"
        },
        {
          "type": "null"
        }
      ],
      "description": "The configuration of the SAE (custom or from SAE Lens) that ran the evaluation. This should match the SAE Lens config schema.",
      "title": "SAE Config Dict"
    },
    "eval_result_unstructured": {
      "anyOf": [
        {},
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Optional. Any additional outputs that don't fit into the structured eval_result_metrics or eval_result_details fields. Since these are unstructured, don't expect this to be easily renderable in UIs, or contain any titles or descriptions.",
      "title": "Unstructured Results"
    }
  },
  "required": [
    "eval_config",
    "eval_id",
    "datetime_epoch_millis",
    "eval_result_metrics",
    "sae_bench_commit_hash",
    "sae_lens_id",
    "sae_lens_release_id",
    "sae_lens_version",
    "sae_cfg_dict"
  ],
  "title": "Sparse Probing",
  "type": "object"
}