import numpy as np
import json
from sentence_transformers import SentenceTransformer

# Load article vectors and paths
embeddings = np.load('article_embeddings.npy')  # shape: (N, D)
with open('article_refs.json', 'r', encoding='utf-8') as f:
    file_refs = json.load(f)

# Load the same model
model = SentenceTransformer('')

# Your keywords
query = "benchmark dataset"  # Can be replaced with any keywords

# Keyword vectorization
query_emb = model.encode([query], normalize_embeddings=True)  # shape: (1, D)

# Calculate relevance (dot product/cosine similarity)
scores = np.dot(embeddings, query_emb[0])  # shape: (N,)

# Sort and output Top-K
top_k = 10
top_indices = np.argsort(scores)[::-1][:top_k]
for idx in top_indices:
    print(f"{file_refs[idx]} relevance: {scores[idx]:.4f}")