group: cmmlu
task:
  - cmmlu_agronomy
  - cmmlu_anatomy
  - cmmlu_ancient_chinese
  - cmmlu_arts
  - cmmlu_astronomy
  - cmmlu_business_ethics
  - cmmlu_chinese_civil_service_exam
  - cmmlu_chinese_driving_rule
  - cmmlu_chinese_food_culture
  - cmmlu_chinese_foreign_policy
  - cmmlu_chinese_history
  - cmmlu_chinese_literature
  - cmmlu_chinese_teacher_qualification
  - cmmlu_clinical_knowledge
  - cmmlu_college_actuarial_science
  - cmmlu_college_education
  - cmmlu_college_engineering_hydrology
  - cmmlu_college_law
  - cmmlu_college_mathematics
  - cmmlu_college_medical_statistics
  - cmmlu_college_medicine
  - cmmlu_computer_science
  - cmmlu_computer_security
  - cmmlu_conceptual_physics
  - cmmlu_construction_project_management
  - cmmlu_economics
  - cmmlu_education
  - cmmlu_electrical_engineering
  - cmmlu_elementary_chinese
  - cmmlu_elementary_commonsense
  - cmmlu_elementary_information_and_technology
  - cmmlu_elementary_mathematics
  - cmmlu_ethnology
  - cmmlu_food_science
  - cmmlu_genetics
  - cmmlu_global_facts
  - cmmlu_high_school_biology
  - cmmlu_high_school_chemistry
  - cmmlu_high_school_geography
  - cmmlu_high_school_mathematics
  - cmmlu_high_school_physics
  - cmmlu_high_school_politics
  - cmmlu_human_sexuality
  - cmmlu_international_law
  - cmmlu_journalism
  - cmmlu_jurisprudence
  - cmmlu_legal_and_moral_basis
  - cmmlu_logical
  - cmmlu_machine_learning
  - cmmlu_management
  - cmmlu_marketing
  - cmmlu_marxist_theory
  - cmmlu_modern_chinese
  - cmmlu_nutrition
  - cmmlu_philosophy
  - cmmlu_professional_accounting
  - cmmlu_professional_law
  - cmmlu_professional_medicine
  - cmmlu_professional_psychology
  - cmmlu_public_relations
  - cmmlu_security_study
  - cmmlu_sociology
  - cmmlu_sports_science
  - cmmlu_traditional_chinese_medicine
  - cmmlu_virology
  - cmmlu_world_history
  - cmmlu_world_religions
aggregate_metric_list:
  - aggregation: mean
    metric: acc
    weight_by_size: true
  - aggregation: mean
    metric: acc_norm
    weight_by_size: true
metadata:
  version: 1.0
