

import torch
import pickle
from transformers import AutoTokenizer, AutoModel

with open("put_your_input_data_here.txt", "r") as f: 		#Put your input data of sentences/paragraphs/articles here. The code that follows is written for the questions.txt file, i.e. it is for a corpus of sentences/statements
    sentences = f.readlines()

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased")

inputs = tokenizer(sentences, return_tensors="pt", padding=True)
outputs = model(**inputs)

sentence_emd = torch.mean(outputs.last_hidden_state, dim=1)

a_norm = sentence_emd / sentence_emd.norm(dim=1)[:, None]

res = torch.mm(a_norm, a_norm.transpose(0, 1)).tolist()

print(res)

with open("output_file", "wb") as f:
    pickle.dump(res, f)

with open("output_file", "rb") as f:
    res = pickle.load(f)

with open("output_file.txt", "w") as f:		#the output matrix will be in output_file.txt
    f.write(str(res))