subcategories = {
    "agronomy": ['other'],
    "anatomy": ['biology'],
    "ancient_chinese": ['linguistics','china specific'],
    "arts": ['arts'],
    "astronomy": ['physics'],
    "business_ethics": ['business'],
    "chinese_civil_service_exam": ['politics','china specific'],
    "chinese_driving_rule": ['other','china specific'],
    "chinese_food_culture": ['culture','china specific'],
    "chinese_foreign_policy": ['politics','china specific'],
    "chinese_history":['history','china specific'],
    "chinese_literature": ['literature','china specific'],
    "chinese_teacher_qualification": ['education','china specific'],
    "college_actuarial_science":['math'],
    "college_education":['education'],
    "college_engineering_hydrology": ['engineering'],
    "college_law": ['law'],
    "college_mathematics": ['math'],
    "college_medical_statistics":['statistics'],
    "clinical_knowledge": ['other'],
    "college_medicine": ['other'],
    "computer_science": ['computer science'],
    "computer_security": ['other'],
    "conceptual_physics": ['physics'],
    "construction_project_management": ['other','china specific'],
    "economics": ['economics'],
    "education": ['education'],
    "elementary_chinese":['linguistics','china specific'],
    "elementary_commonsense":['other','china specific'],
    "elementary_information_and_technology": ['other'],
    "electrical_engineering": ['engineering'],
    "elementary_mathematics": ['math'],
    "ethnology": ['culture','china specific'],
    "food_science": ['other'],
    "genetics": ['biology'],
    "global_facts": ['global'],
    "high_school_biology": ['biology'],
    "high_school_chemistry": ['chemistry'],
    "high_school_geography": ['geography'],
    "high_school_mathematics": ['math'],
    "high_school_physics": ['physics'],
    "high_school_politics": ['politics','china specific'],
    "human_sexuality": ['other'],
    "international_law": ['law'],
    "journalism": ['sociology'],
    "jurisprudence": ['law'],
    "legal_and_moral_basis": ['other'],
    "logical": ['philosophy'],
    "machine_learning": ['computer science'],
    "management": ['business'],
    "marketing": ['business'],
    "marxist_theory": ['philosophy'],
    "modern_chinese": ['linguistics','china specific'],
    "nutrition": ['other'],
    "philosophy": ['philosophy'],
    "professional_accounting": ['business'],
    "professional_law": ['law'],
    "professional_medicine": ['other'],
    "professional_psychology": ['psychology'],
    "public_relations": ['politics'],
    "security_study": ['politics'],
    "sociology": ['culture'],
    "sports_science": ['other'],
    "traditional_chinese_medicine": ['other','china specific'],
    "virology": ['biology'],
    "world_history":['history'],
    "world_religions": ['global'],
}

categories = {
    "STEM": ["physics", "chemistry", "biology", "computer science", "math", "engineering", "statistics"],
    "Humanities": ["history", "philosophy", "law", "arts", "literature", "global"],
    "Social Science": ['linguistics',"business", "politics", "culture", "economics", "geography", "psychology", "education", "sociology"],
    "Other":["other"],
    "China specific": ["china specific"],
}

category2subject = {}
for k, v in categories.items():
    for subject, subcat in subcategories.items():
        for c in subcat:
            if c in v:
                category2subject.setdefault(k, []).append(subject)

cmmlu_summary_groups = []

_cmmlu_humanities = ['cmmlu-' + s for s in category2subject['Humanities']]
cmmlu_summary_groups.append({'name': 'cmmlu-humanities', 'subsets': _cmmlu_humanities})

_cmmlu_stem = ['cmmlu-' + s for s in category2subject['STEM']]
cmmlu_summary_groups.append({'name': 'cmmlu-stem', 'subsets': _cmmlu_stem})

_cmmlu_social_science = ['cmmlu-' + s for s in category2subject['Social Science']]
cmmlu_summary_groups.append({'name': 'cmmlu-social-science', 'subsets': _cmmlu_social_science})

_cmmlu_other = ['cmmlu-' + s for s in category2subject['Other']]
cmmlu_summary_groups.append({'name': 'cmmlu-other', 'subsets': _cmmlu_other})

_cmmlu_china_specific = ['cmmlu-' + s for s in category2subject['China specific']]
cmmlu_summary_groups.append({'name': 'cmmlu-china-specific', 'subsets': _cmmlu_china_specific})

_cmmlu_all = ['cmmlu-' + s for s in subcategories.keys()]
cmmlu_summary_groups.append({'name': 'cmmlu', 'subsets': _cmmlu_all})
