/* eslint-disable */
/**
 * This file was automatically generated by json-schema-to-typescript.
 * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
 * and run json-schema-to-typescript to regenerate this file.
 */

export type EvalTypeId = string;
/**
 * Number of entities in the dataset, filtered by prediction accuracy over attributes / templates.
 */
export type NumberOfDistinctEntitiesInTheDataset = number;
/**
 * Number of templates in the dataset, filtered by prediction accuracy over entities.
 */
export type NumberOfDistinctTemplatesInTheDataset = number;
/**
 * Downsample the full dataset to this size.
 */
export type FullDatasetDownsample = number | null;
/**
 * Number of pairs per attribute
 */
export type NumberOfPairsPerAttribute = number;
/**
 * Fraction of dataset to use for training.
 */
export type TrainTestSplit = number;
/**
 * Force recomputation of the dataset, ie. generating model predictions for attribute values, evaluating, and downsampling.
 */
export type ForceDatasetRecompute = boolean;
/**
 * Model name
 */
export type ModelName = string;
/**
 * LLM data type
 */
export type LLMDataType = string;
/**
 * LLM batch size, inference only
 */
export type LLMBatchSize = number;
/**
 * Learning rate for the MDBM
 */
export type LearningRate = number;
/**
 * Number of training epochs
 */
export type NumberOfEpochs = number;
/**
 * If True, we completely ignore the SAE and train an MDAS instead.
 */
export type TrainMDAS = boolean;
/**
 * Number of tokens to generate for each intervention. 8 was used in the RAVEL paper
 */
export type NumberOfGeneratedTokens = number;
/**
 * Random seed
 */
export type RandomSeed = number;
/**
 * Directory to save artifacts
 */
export type ArtifactDirectory = string;
/**
 * A unique UUID identifying this specific eval run
 */
export type ID = string;
/**
 * The datetime of the evaluation in epoch milliseconds
 */
export type DateTimeEpochMs = number;
/**
 * Mean of cause and isolation scores across RAVEL datasets.
 */
export type DisentanglementScore = number;
/**
 * Cause score: Patching attribute-related SAE latents. High cause accuracy indicates that the SAE latents are related to the attribute.
 */
export type CauseScore = number;
/**
 * Isolation score: Patching SAE latents related to another attribute. High isolation accuracy indicates that latents related to another attribute are not related to this attribute.
 */
export type IsolationScore = number;
/**
 * Optional. The details of the evaluation. A list of objects that stores nested or more detailed data, such as details about the absorption of each letter.
 */
export type ResultDetails = BaseResultDetail[];
/**
 * The commit hash of the SAE Bench that ran the evaluation.
 */
export type SAEBenchCommitHash = string;
/**
 * The ID of the SAE in SAE Lens.
 */
export type SAELensID = string | null;
/**
 * The release ID of the SAE in SAE Lens.
 */
export type SAELensReleaseID = string | null;
/**
 * The version of SAE Lens that ran the evaluation.
 */
export type SAELensVersion = string | null;
/**
 * The configuration of the SAE (custom or from SAE Lens) that ran the evaluation. This should match the SAE Lens config schema.
 */
export type SAEConfigDict = {
  [k: string]: unknown;
} | null;

/**
 * An evaluation using SAEs for targeted modification of language model output. We leverage the RAVEL dataset of entity-attribute pairs. After filtering for known pairs, we identify attribute-related SAE latents and deterimine the effect on model predictions with activation patching experiments.
 */
export interface RAVEL {
  eval_type_id?: EvalTypeId;
  eval_config: EvalConfigType;
  eval_id: ID;
  datetime_epoch_millis: DateTimeEpochMs;
  eval_result_metrics: ResultMetricsCategorized;
  eval_result_details?: ResultDetails;
  sae_bench_commit_hash: SAEBenchCommitHash;
  sae_lens_id: SAELensID;
  sae_lens_release_id: SAELensReleaseID;
  sae_lens_version: SAELensVersion;
  sae_cfg_dict: SAEConfigDict;
  eval_result_unstructured?: unknown;
  [k: string]: unknown;
}
/**
 * The configuration of the evaluation.
 */
export interface EvalConfigType {
  entity_attribute_selection?: SelectionOfEntityAndAttributeClasses;
  top_n_entities?: NumberOfDistinctEntitiesInTheDataset;
  top_n_templates?: NumberOfDistinctTemplatesInTheDataset;
  full_dataset_downsample?: FullDatasetDownsample;
  num_pairs_per_attribute?: NumberOfPairsPerAttribute;
  train_test_split?: TrainTestSplit;
  force_dataset_recompute?: ForceDatasetRecompute;
  model_name?: ModelName;
  llm_dtype?: LLMDataType;
  llm_batch_size?: LLMBatchSize;
  learning_rate?: LearningRate;
  num_epochs?: NumberOfEpochs;
  train_mdas?: TrainMDAS;
  n_generated_tokens?: NumberOfGeneratedTokens;
  random_seed?: RandomSeed;
  artifact_dir?: ArtifactDirectory;
  [k: string]: unknown;
}
/**
 * Subset of the RAVEL datset to be evaluated. Each key is an entity class, and the value is a list of at least two attribute classes.
 */
export interface SelectionOfEntityAndAttributeClasses {
  [k: string]: string[];
}
/**
 * The metrics of the evaluation, organized by category. Define your own categories and the metrics that go inside them.
 */
export interface ResultMetricsCategorized {
  ravel: RAVEL1;
  [k: string]: unknown;
}
/**
 * RAVEL metrics
 */
export interface RAVEL1 {
  disentanglement_score: DisentanglementScore;
  cause_score: CauseScore;
  isolation_score: IsolationScore;
  [k: string]: unknown;
}
export interface BaseResultDetail {
  [k: string]: unknown;
}
