/* eslint-disable */
/**
 * This file was automatically generated by json-schema-to-typescript.
 * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
 * and run json-schema-to-typescript to regenerate this file.
 */

export type EvalTypeId = string;
/**
 * Random seed
 */
export type RandomSeed = number;
/**
 * List of dataset names. We have at most 5 class names in a single subset, which is why we have multiple bias_in_bios class subsets.
 */
export type DatasetNames = string[];
/**
 * Probe train set size
 */
export type ProbeTrainSetSize = number;
/**
 * Probe test set size
 */
export type ProbeTestSetSize = number;
/**
 * The maximum length of each input to the LLM. Any longer inputs will be truncated, keeping only the beginning.
 */
export type LLMContextLength = number;
/**
 * SAE batch size, inference only
 */
export type SAEBatchSize = number;
/**
 * LLM batch size. This is set by default in the main script, or it can be set with a command line argument.
 */
export type LLMBatchSize = number | null;
/**
 * LLM data type. This is set by default in the main script, or it can be set with a command line argument.
 */
export type LLMDataType = string;
/**
 * Model name. Must be set with a command line argument.
 */
export type ModelName = string;
/**
 * K represents the number of SAE features or residual stream channels we train the linear probe on. We iterate over all values of K.
 */
export type KValues = number[];
/**
 * Lower GPU memory usage by doing more computation on the CPU. Useful on 1M width SAEs. Will be slower and require more system memory.
 */
export type LowerMemoryUsage = boolean;
/**
 * A unique UUID identifying this specific eval run
 */
export type ID = string;
/**
 * The datetime of the evaluation in epoch milliseconds
 */
export type DateTimeEpochMs = number;
/**
 * Linear probe accuracy when training on the full LLM residual stream
 */
export type LLMTestAccuracy = number;
/**
 * Linear probe accuracy when trained on the LLM top 1 residual stream channel test accuracy
 */
export type LLMTop1TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 2 residual stream channels test accuracy
 */
export type LLMTop2TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 5 residual stream channels test accuracy
 */
export type LLMTop5TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 10 residual stream channels
 */
export type LLMTop10TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 20 residual stream channels
 */
export type LLMTop20TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 50 residual stream channels
 */
export type LLMTop50TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 100 residual stream channels
 */
export type LLMTop100TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on all SAE latents
 */
export type SAETestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 1 SAE latents
 */
export type SAETop1TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 2 SAE latents
 */
export type SAETop2TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 5 SAE latents
 */
export type SAETop5TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 10 SAE latents
 */
export type SAETop10TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 20 SAE latents
 */
export type SAETop20TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 50 SAE latents
 */
export type SAETop50TestAccuracy = number | null;
/**
 * Linear probe accuracy when trained on the top 100 SAE latents
 */
export type SAETop100TestAccuracy = number | null;
/**
 * Dataset name
 */
export type DatasetName = string;
/**
 * Linear probe accuracy when trained on all LLM residual stream channels
 */
export type LLMTestAccuracy1 = number;
/**
 * Linear probe accuracy when trained on the LLM top 1 residual stream channels
 */
export type LLMTop1TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 2 residual stream channels
 */
export type LLMTop2TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 5 residual stream channels
 */
export type LLMTop5TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 10 residual stream channels
 */
export type LLMTop10TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 20 residual stream channels
 */
export type LLMTop20TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 50 residual stream channels
 */
export type LLMTop50TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the LLM top 100 residual stream channels
 */
export type LLMTop100TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on all SAE latents
 */
export type SAETestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 1 SAE latents
 */
export type SAETop1TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 2 SAE latents
 */
export type SAETop2TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 5 SAE latents
 */
export type SAETop5TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 10 SAE latents
 */
export type SAETop10TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 20 SAE latents
 */
export type SAETop20TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 50 SAE latents
 */
export type SAETop50TestAccuracy1 = number | null;
/**
 * Linear probe accuracy when trained on the top 100 SAE latents
 */
export type SAETop100TestAccuracy1 = number | null;
/**
 * Each object is a stat on the sparse probing results for a dataset.
 */
export type PerDatasetSparseProbingResults = SparseProbingResultDetail[];
/**
 * The commit hash of the SAE Bench that ran the evaluation.
 */
export type SAEBenchCommitHash = string;
/**
 * The ID of the SAE in SAE Lens.
 */
export type SAELensID = string | null;
/**
 * The release ID of the SAE in SAE Lens.
 */
export type SAELensReleaseID = string | null;
/**
 * The version of SAE Lens that ran the evaluation.
 */
export type SAELensVersion = string | null;
/**
 * The configuration of the SAE (custom or from SAE Lens) that ran the evaluation. This should match the SAE Lens config schema.
 */
export type SAEConfigDict = {
  [k: string]: unknown;
} | null;

/**
 * An evaluation using SAEs to probe for supervised concepts in LLMs. We use sparse probing with the top K SAE latents and probe for over 30 different classes across 5 datasets.
 */
export interface SparseProbing {
  eval_type_id?: EvalTypeId;
  eval_config: EvalConfigType;
  eval_id: ID;
  datetime_epoch_millis: DateTimeEpochMs;
  eval_result_metrics: ResultMetricsCategorized;
  eval_result_details?: PerDatasetSparseProbingResults;
  sae_bench_commit_hash: SAEBenchCommitHash;
  sae_lens_id: SAELensID;
  sae_lens_release_id: SAELensReleaseID;
  sae_lens_version: SAELensVersion;
  sae_cfg_dict: SAEConfigDict;
  eval_result_unstructured?: unknown;
  [k: string]: unknown;
}
/**
 * The configuration of the evaluation.
 */
export interface EvalConfigType {
  random_seed?: RandomSeed;
  dataset_names?: DatasetNames;
  probe_train_set_size?: ProbeTrainSetSize;
  probe_test_set_size?: ProbeTestSetSize;
  context_length?: LLMContextLength;
  sae_batch_size?: SAEBatchSize;
  llm_batch_size?: LLMBatchSize;
  llm_dtype?: LLMDataType;
  model_name?: ModelName;
  k_values?: KValues;
  lower_vram_usage?: LowerMemoryUsage;
  [k: string]: unknown;
}
/**
 * The metrics of the evaluation, organized by category. Define your own categories and the metrics that go inside them.
 */
export interface ResultMetricsCategorized {
  llm: LLM;
  sae: SAE;
  [k: string]: unknown;
}
/**
 * LLM metrics
 */
export interface LLM {
  llm_test_accuracy: LLMTestAccuracy;
  llm_top_1_test_accuracy?: LLMTop1TestAccuracy;
  llm_top_2_test_accuracy?: LLMTop2TestAccuracy;
  llm_top_5_test_accuracy?: LLMTop5TestAccuracy;
  llm_top_10_test_accuracy?: LLMTop10TestAccuracy;
  llm_top_20_test_accuracy?: LLMTop20TestAccuracy;
  llm_top_50_test_accuracy?: LLMTop50TestAccuracy;
  llm_top_100_test_accuracy?: LLMTop100TestAccuracy;
  [k: string]: unknown;
}
/**
 * SAE metrics
 */
export interface SAE {
  sae_test_accuracy?: SAETestAccuracy;
  sae_top_1_test_accuracy?: SAETop1TestAccuracy;
  sae_top_2_test_accuracy?: SAETop2TestAccuracy;
  sae_top_5_test_accuracy?: SAETop5TestAccuracy;
  sae_top_10_test_accuracy?: SAETop10TestAccuracy;
  sae_top_20_test_accuracy?: SAETop20TestAccuracy;
  sae_top_50_test_accuracy?: SAETop50TestAccuracy;
  sae_top_100_test_accuracy?: SAETop100TestAccuracy;
  [k: string]: unknown;
}
export interface SparseProbingResultDetail {
  dataset_name: DatasetName;
  llm_test_accuracy: LLMTestAccuracy1;
  llm_top_1_test_accuracy?: LLMTop1TestAccuracy1;
  llm_top_2_test_accuracy?: LLMTop2TestAccuracy1;
  llm_top_5_test_accuracy?: LLMTop5TestAccuracy1;
  llm_top_10_test_accuracy?: LLMTop10TestAccuracy1;
  llm_top_20_test_accuracy?: LLMTop20TestAccuracy1;
  llm_top_50_test_accuracy?: LLMTop50TestAccuracy1;
  llm_top_100_test_accuracy?: LLMTop100TestAccuracy1;
  sae_test_accuracy?: SAETestAccuracy1;
  sae_top_1_test_accuracy?: SAETop1TestAccuracy1;
  sae_top_2_test_accuracy?: SAETop2TestAccuracy1;
  sae_top_5_test_accuracy?: SAETop5TestAccuracy1;
  sae_top_10_test_accuracy?: SAETop10TestAccuracy1;
  sae_top_20_test_accuracy?: SAETop20TestAccuracy1;
  sae_top_50_test_accuracy?: SAETop50TestAccuracy1;
  sae_top_100_test_accuracy?: SAETop100TestAccuracy1;
  [k: string]: unknown;
}
