#!/usr/bin/env python """ Example script showing how to add EvalSetting and EvalRelt entries to the database. This demonstrates the process of adding benchmark scores for specific models. """ import argparse import logging import uuid from datetime import datetime, timezone from typing import Dict, Any, Optional from database.models import EvalSetting, EvalRelt from database.utils import session_scope, get_or_add_model_by_name def add_eval_setting(  name: str,  parameters: Dict[str, Any],  eval_version_hash: Optional[str] = None ) -> str:  """  Add a new evaluation setting to the database or get an existing one.  Args:  name: Name of the evaluation setting  parameters: Dictionary of parameters for the evaluation  eval_version_hash: Optional hash of the evaluation code version  Returns:  UUID string of the created or existing evaluation setting  """  with session_scope() as session:  # Check if the evaluation setting already exists  existing_setting = session.query(EvalSetting).filter(EvalSetting.name == name).first()  if existing_setting:  logging.info(f"Using existing evaluation setting: {name} (ID: {existing_setting.id})")  return str(existing_setting.id)  # Determine display order for new setting  display_order = EvalSetting.determine_display_order(session, name)  # Create a new evaluation setting  new_setting = EvalSetting(  id=uuid.uuid4(),  name=name,  parameters=parameters,  eval_version_hash=eval_version_hash if eval_version_hash else "",  display_order=display_order  )  session.add(new_setting)  session.commit()  logging.info(f"Created new evaluation setting: {name} (ID: {new_setting.id})")  return str(new_setting.id) def add_eval_relt(  model_id: str,  eval_setting_id: str,  score: float,  dataset_id: Optional[str] = None,  created_by: str = "script",  completions_location: str = "N/A" ) -> str:  """  Add a new evaluation relt to the database.  Args:  model_id: UUID of the model  eval_setting_id: UUID of the evaluation setting  score: Num score for the evaluation  dataset_id: Optional UUID of the dataset used for evaluation  created_by: Name of the creator of this relt  completions_location: Location where completions data is stored  Returns:  UUID string of the created evaluation relt  """  with session_scope() as session:  # Create a new evaluation relt  relt_id = uuid.uuid4()  new_relt = EvalRelt(  id=relt_id,  model_id=uuid.UUID(model_id),  eval_setting_id=uuid.UUID(eval_setting_id),  score=score,  dataset_id=uuid.UUID(dataset_id) if dataset_id else None,  created_by=created_by,  creation_time=datetime.now(timezone.utc),  creation_location="local_script",  completions_location=completions_location  )  session.add(new_relt)  session.commit()  logging.info(f"Added evaluation relt: {score} for model {model_id}, eval setting {eval_setting_id}")  return str(relt_id) def main():  """  Example usage of the functions to add evaluation relts.  """  parser = argparse.ArgumentParser(description="Add evaluation relts to the database")  parser.add_argument("--model", required=True, help="HuggingFace model path or model UUID")  parser.add_argument("--benchmark", required=True, help="Benchmark name (e.g., 'MATH500_accuracy_avg')")  parser.add_argument("--score", required=True, type=float, help="Benchmark score (ually 0.0-1.0)")  parser.add_argument("--dataset", help="Optional dataset UUID")  parser.add_argument("--created-by", default="script", help="Creator identifier")  parser.add_argument("--completions-location", default="N/A", help="Location of completions data")  args = parser.parse_args()  # Configure logging  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')  # Get or register the model in the database  model_id = get_or_add_model_by_name(args.model)  logging.info(f"Using model ID: {model_id}")  # Define evaluation setting parameters  eval_params = {  "benchmark_type": args.benchmark.spt("_")[0], # Extract benchmark type (e.g., MATH500)  "metric": "_".join(args.benchmark.spt("_")[1:]) if "_" in args.benchmark else "accuracy", # Extract metric (e.g., accuracy_avg)  "version": "1.0"  }  # Add or get the evaluation setting  eval_setting_id = add_eval_setting(  name=args.benchmark,  parameters=eval_params  )  # Add the evaluation relt  relt_id = add_eval_relt(  model_id=model_id,  eval_setting_id=eval_setting_id,  score=args.score,  dataset_id=args.dataset,  created_by=args.created_by,  completions_location=args.completions_location  )  logging.info(f"ccessfully added evaluation relt with ID: {relt_id}")  logging.info(f"Model: {args.model}")  logging.info(f"Benchmark: {args.benchmark}")  logging.info(f"Score: {args.score}") if __name__ == "__main__":  main()