{
  "$defs": {
    "BaseResultDetail": {
      "properties": {},
      "title": "BaseResultDetail",
      "type": "object"
    },
    "UnlearningEvalConfig": {
      "properties": {
        "random_seed": {
          "default": 42,
          "description": "Random seed",
          "title": "Random Seed",
          "type": "integer"
        },
        "dataset_names": {
          "description": "List of dataset names. We want to unlearn wmdp-bio while retaining knowledge in other datasets",
          "items": {
            "type": "string"
          },
          "title": "Dataset Names",
          "type": "array"
        },
        "intervention_method": {
          "default": "clamp_feature_activation",
          "description": "Intervention method. We only support 'clamp_feature_activation' for now",
          "title": "Intervention Method",
          "type": "string"
        },
        "retain_thresholds": {
          "description": "We ignore features that activate more than this threshold on the retain dataset",
          "items": {
            "type": "number"
          },
          "title": "Retain Thresholds",
          "type": "array"
        },
        "n_features_list": {
          "description": "Each N is the number of features we select and clamp to a negative value",
          "items": {
            "type": "integer"
          },
          "title": "N Features List",
          "type": "array"
        },
        "multipliers": {
          "description": "A list of negative values. We iterate over this list, clamping the selected features to each value",
          "items": {
            "type": "integer"
          },
          "title": "Multipliers",
          "type": "array"
        },
        "dataset_size": {
          "default": 1024,
          "description": "Dataset size we use when calculating feature sparsity",
          "title": "Dataset Size",
          "type": "integer"
        },
        "seq_len": {
          "default": 1024,
          "description": "Sequence length when calculating feature sparsity",
          "title": "Sequence Length",
          "type": "integer"
        },
        "n_batch_loss_added": {
          "default": 50,
          "description": "Number of batches to use when calculating the loss added by an intervention (currently not supported).",
          "title": "N Batch Loss Added",
          "type": "integer"
        },
        "target_metric": {
          "default": "correct",
          "description": "Controls the type of `question_ids` we load. We support 'correct', `correct-iff-question`, and `correct-no-tricks",
          "title": "Target Metric",
          "type": "string"
        },
        "save_metrics": {
          "default": true,
          "description": "If true, we save the metrics for each set of intervention hyperparameters. This is required to be true currently, as the unlearning score is calculated over all results.",
          "title": "Save Metrics Flag",
          "type": "boolean"
        },
        "model_name": {
          "default": "",
          "description": "Model name. Must be set with a command line argument. We recommend instruct tuned models >= 2B parameters.",
          "title": "Model Name",
          "type": "string"
        },
        "llm_batch_size": {
          "default": null,
          "description": "LLM batch size. This is set by default in the main script, or it can be set with a command line argument.",
          "title": "LLM Batch Size",
          "type": "integer"
        },
        "llm_dtype": {
          "default": "",
          "description": "LLM data type. This is set by default in the main script, or it can be set with a command line argument.",
          "title": "LLM Data Type",
          "type": "string"
        }
      },
      "title": "UnlearningEvalConfig",
      "type": "object"
    },
    "UnlearningMetricCategories": {
      "properties": {
        "unlearning": {
          "$ref": "#/$defs/UnlearningMetrics",
          "description": "Metrics related to unlearning",
          "title": "Unlearning"
        }
      },
      "required": [
        "unlearning"
      ],
      "title": "UnlearningMetricCategories",
      "type": "object"
    },
    "UnlearningMetrics": {
      "properties": {
        "unlearning_score": {
          "description": "Unlearning score, using methodology from APPLYING SPARSE AUTOENCODERS TO UNLEARN KNOWLEDGE IN LANGUAGE MODELS",
          "title": "Unlearning Score",
          "type": "number",
          "ui_default_display": true
        }
      },
      "required": [
        "unlearning_score"
      ],
      "title": "UnlearningMetrics",
      "type": "object"
    }
  },
  "description": "An evaluation of the ability of SAEs to unlearn biology knowledge from LLMs, using methodology from `Applying Sparse Autoencoders to Unlearn Knowledge in Language Models`",
  "properties": {
    "eval_type_id": {
      "default": "unlearning",
      "description": "The type of the evaluation",
      "title": "Eval Type ID",
      "type": "string"
    },
    "eval_config": {
      "$ref": "#/$defs/UnlearningEvalConfig",
      "description": "The configuration of the evaluation.",
      "title": "Eval Config Type"
    },
    "eval_id": {
      "description": "A unique UUID identifying this specific eval run",
      "title": "ID",
      "type": "string"
    },
    "datetime_epoch_millis": {
      "description": "The datetime of the evaluation in epoch milliseconds",
      "title": "DateTime (epoch ms)",
      "type": "integer"
    },
    "eval_result_metrics": {
      "$ref": "#/$defs/UnlearningMetricCategories",
      "description": "The metrics of the evaluation, organized by category. Define your own categories and the metrics that go inside them.",
      "title": "Result Metrics Categorized"
    },
    "eval_result_details": {
      "default": null,
      "description": "Optional. The details of the evaluation. A list of objects that stores nested or more detailed data, such as details about the absorption of each letter.",
      "items": {
        "$ref": "#/$defs/BaseResultDetail"
      },
      "title": "Result Details",
      "type": "array"
    },
    "sae_bench_commit_hash": {
      "description": "The commit hash of the SAE Bench that ran the evaluation.",
      "title": "SAE Bench Commit Hash",
      "type": "string"
    },
    "sae_lens_id": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "description": "The ID of the SAE in SAE Lens.",
      "title": "SAE Lens ID"
    },
    "sae_lens_release_id": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "description": "The release ID of the SAE in SAE Lens.",
      "title": "SAE Lens Release ID"
    },
    "sae_lens_version": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "description": "The version of SAE Lens that ran the evaluation.",
      "title": "SAE Lens Version"
    },
    "sae_cfg_dict": {
      "anyOf": [
        {
          "type": "object"
        },
        {
          "type": "null"
        }
      ],
      "description": "The configuration of the SAE (custom or from SAE Lens) that ran the evaluation. This should match the SAE Lens config schema.",
      "title": "SAE Config Dict"
    },
    "eval_result_unstructured": {
      "anyOf": [
        {},
        {
          "type": "null"
        }
      ],
      "default": null,
      "description": "Optional. Any additional outputs that don't fit into the structured eval_result_metrics or eval_result_details fields. Since these are unstructured, don't expect this to be easily renderable in UIs, or contain any titles or descriptions.",
      "title": "Unstructured Results"
    }
  },
  "required": [
    "eval_config",
    "eval_id",
    "datetime_epoch_millis",
    "eval_result_metrics",
    "sae_bench_commit_hash",
    "sae_lens_id",
    "sae_lens_release_id",
    "sae_lens_version",
    "sae_cfg_dict"
  ],
  "title": "Unlearning",
  "type": "object"
}