
import argparse
import os
import time
import mteb
from sentence_transformers import SentenceTransformer

def run_mteb_evaluation(model_name):
    start_time = time.time()

    # Initialize the SentenceTransformer model
    model = SentenceTransformer(model_name)

    # Get the tasks in English for Clustering and Retrieval
    dataset_names = [
    # clustering
    "ArXivHierarchicalClusteringP2P",
    "BiorxivClusteringP2P.v2",
    "MedrxivClusteringS2S.v2",
    "StackExchangeClustering.v2",
    "TwentyNewsgroupsClustering.v2",

    # Retrieval
    "CodeFeedbackMT",
    "ArguAna",
    "CQADupstackEnglishRetrieval",
    "CQADupstackPhysicsRetrieval",
    "DBPedia",
    "DBPediaHardNegatives",
    "FEVER",
    "HotpotQAHardNegatives",
    "MSMARCO",
    "MSMARCOHardNegatives",
    "MSMARCOv2",
    "QuoraRetrieval",
    "SpartQA",
    "TopiOCQA",
    "MultiLongDocRetrieval",
    "WikipediaRetrievalMultilingual",
    "XQuADRetrieval",


]

    tasks = mteb.get_tasks(
        languages=["eng"], 
    # task_types=["Clustering", "Retrieval"], 
        tasks=dataset_names
    )
    # tasks = mteb.get_tasks(languages=["eng"], task_types=["Clustering", "Retrieval"])
    # tasks = mteb.get_tasks(tasks=["SprintDuplicateQuestions"])

    # Create an MTEB evaluation object
    evaluation = mteb.MTEB(tasks=tasks)

    # Define the output folder based on the model name
    # output_folder = os.path.join("results", model_name)
    output_folder = "results"

    # Run the evaluation and store the results
    results = evaluation.run(model, output_folder=output_folder)
    
    print(f"Results saved to: {output_folder}")

    end_time = time.time()
    duration = end_time - start_time

    # Save the duration in a file within the output folder
    with open(os.path.join(output_folder, "evaluation_log.txt"), "w") as log_file:
        log_file.write(f"Model evaluated: {model_name}\n")
        log_file.write(f"Start time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))}\n")
        log_file.write(f"End time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))}\n")
        log_file.write(f"Total duration: {duration:.2f} seconds\n")

if __name__ == "__main__":
    # Define the command-line argument parser
    # parser = argparse.ArgumentParser(description="Run MTEB evaluation for a specified model.")
    
    # Add the model name as a required argument
    # parser.add_argument("model_name", type=str, help="The name of the model to evaluate.")
    
    # Parse the arguments from the command line
    # args = parser.parse_args()
    
    # Run the evaluation with the specified model
    # run_mteb_evaluation(args.model_name)

    model_name = "reaganjlee/baai-truncate-finetune"
    run_mteb_evaluation(model_name)
