import os 

model_name_transfer = {
    "Claude 3 Opus": "anthropic_claude-3-opus-20240229",
    "Claude 3.5 Sonnet": "anthropic_claude-3-5-sonnet-20240620",
    "GPT-4": "openai_gpt-4-0613",
    "GPT-4o": "openai_gpt-4o-2024-05-13",
    'Gemini 1.5 Pro': "google_gemini-1.5-pro-001",
    'Gemini 1.5 Pro (preview)': "google_gemini-1.5-pro-preview-0409",
    "Qwen2 Instruct-72B": "qwen_qwen2-72b-instruct",
    "LLaMa3-70b":"meta_llama-3-70b",
    "GPT-4 Turbo (preview)": "openai_gpt-4-1106-preview",
    "GPT-4 Turbo ": "openai_gpt-4-turbo-2024-04-09",
    "Yi-Large":"01-ai_yi-large-preview",
    "palmyra":"writer_palmyra-x-v3",
    "palm_unicorn":"google_text-unicorn@001",
    "gemini1.5flashpreview":"google_gemini-1.5-flash-preview-0514",
    "qwen1.5_72b":"qwen_qwen1.5-72b",
    "qwen1.5_110b_chat":"qwen_qwen1.5-110b-chat",
    "mixtral-8x22b":"mistralai_mixtral-8x22b",
    "gemini1.5flash":"google_gemini-1.5-flash-001",
    "yi34b":"01-ai_yi-34b",
    "Claude 3 Sonnet (20240229)":"anthropic_claude-3-sonnet-20240229",
    "LLama2-7b":"meta_llama-2-7b",
    "LLama2-13b":"meta_llama-2-13b",
    "LLama2-70b":"meta_llama-2-70b",
    "LLama3-8b":"meta_llama-3-8b",
    "mistral7b01":"mistralai_mistral-7b-v0.1",
    "mixtral8x7b-32":"mistralai_mixtral-8x7b-32kseqlen",
    "gemma7b":"google_gemma-7b",
    "olmo7b":"allenai_olmo-7b",
    "olmo1.7-7b":"allenai_olmo-1.7-7b",
    "qwen1.5-7b":"qwen_qwen1.5-7b",
    "LLama3.1-405b":"llama3.1_det"
    }

model_list = list(model_name_transfer.values())

dir_data = "mmlu_data"


meta_predictions = """mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_abstract_algebra
mmlu:subject=anatomy,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_anatomy
mmlu:subject=astronomy,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_astronomy
mmlu:subject=business_ethics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_business_ethics
mmlu:subject=clinical_knowledge,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_clinical_knowledge
mmlu:subject=college_biology,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_college_biology
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_college_chemistry
mmlu:subject=college_computer_science,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_college_computer_science
mmlu:subject=college_mathematics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_college_mathematics
mmlu:subject=college_medicine,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_college_medicine
mmlu:subject=college_physics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_college_physics
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_computer_security
mmlu:subject=conceptual_physics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_conceptual_physics
mmlu:subject=econometrics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_econometrics
mmlu:subject=electrical_engineering,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_electrical_engineering
mmlu:subject=elementary_mathematics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_elementary_mathematics
mmlu:subject=formal_logic,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_formal_logic
mmlu:subject=global_facts,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_global_facts
mmlu:subject=high_school_biology,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_biology
mmlu:subject=high_school_chemistry,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_chemistry
mmlu:subject=high_school_computer_science,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_computer_science
mmlu:subject=high_school_european_history,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_european_history
mmlu:subject=high_school_geography,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_geography
mmlu:subject=high_school_government_and_politics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_government_and_politics
mmlu:subject=high_school_macroeconomics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_macroeconomics
mmlu:subject=high_school_mathematics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_mathematics
mmlu:subject=high_school_microeconomics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_microeconomics
mmlu:subject=high_school_physics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_physics
mmlu:subject=high_school_psychology,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_psychology
mmlu:subject=high_school_statistics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_statistics
mmlu:subject=high_school_us_history,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_us_history
mmlu:subject=high_school_world_history,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_high_school_world_history
mmlu:subject=human_aging,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_human_aging
mmlu:subject=human_sexuality,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_human_sexuality
mmlu:subject=international_law,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_international_law
mmlu:subject=jurisprudence,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_jurisprudence
mmlu:subject=logical_fallacies,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_logical_fallacies
mmlu:subject=machine_learning,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_machine_learning
mmlu:subject=management,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_management
mmlu:subject=marketing,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_marketing
mmlu:subject=medical_genetics,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_medical_genetics
mmlu:subject=miscellaneous,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_miscellaneous
mmlu:subject=moral_disputes,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_moral_disputes
mmlu:subject=moral_scenarios,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_moral_scenarios
mmlu:subject=nutrition,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_nutrition
mmlu:subject=philosophy,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_philosophy
mmlu:subject=prehistory,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_prehistory
mmlu:subject=professional_accounting,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_professional_accounting
mmlu:subject=professional_law,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_professional_law
mmlu:subject=professional_medicine,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_professional_medicine
mmlu:subject=professional_psychology,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_professional_psychology
mmlu:subject=public_relations,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_public_relations
mmlu:subject=security_studies,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_security_studies
mmlu:subject=sociology,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_sociology
mmlu:subject=us_foreign_policy,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_us_foreign_policy
mmlu:subject=virology,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_virology
mmlu:subject=world_religions,method=multiple_choice_joint,model=openai_gpt-4o-2024-05-13,eval_split=test,groups=mmlu_world_religions"""

dir_predictions = os.path.join(dir_data, "predictions")