from metrics.commonsense.evaluate import evaluate as commonsense_evaluate
from utils.utils import load_json

if __name__ == "__main__":
    dataset = 'obqa'
    pred_file = f"ckpt/sft/qwen/{dataset}/eval_results/generated_predictions.jsonl"
    examples = load_json(pred_file)
    preds = [e['predict'] for e in examples]
    labels = [e['label'] for e in examples]
    acc = commonsense_evaluate(preds, labels, dataset)
    print(f"Accuracy: {acc}")