import h5py
import numpy as np
from tqdm import tqdm 

def change_dataset(file_name,out_file_name):
    with h5py.File(file_name,'r') as file:
        dataset_train = file['train']
        dataset_test = file['test']
        test_size = len(dataset_test)
        
        neighbors_indices = np.zeros((test_size, 100), dtype=np.int32)
        neighbors_distances = np.zeros((test_size, 100))
        
        test_norm = np.linalg.norm(dataset_test,axis=1,keepdims=True)
        train_norm = np.linalg.norm(dataset_train,axis=1,keepdims=True)
        
        dataset_train = dataset_train / (train_norm + 1e-10)
        dataset_test = dataset_test / (test_norm + 1e-10)
        
        for i in tqdm(range(test_size),desc=""):
            test_sample = dataset_test[i]
            similarity = np.dot(dataset_train,test_sample)
            similarity = np.clip(similarity,-1.0,1.0)
            angular = np.arccos(similarity)
            
            top_indices = np.argsort(angular)[:100]
            neighbors_indices[i] = top_indices
            neighbors_distances[i] = angular[top_indices]
        
            
 
    with h5py.File(out_file_name, 'w') as out_file:
        out_file.create_dataset('train', data=dataset_train)
        out_file.create_dataset('test', data=dataset_test)
        out_file.create_dataset('neighbors', data=neighbors_indices)
        out_file.create_dataset('distances', data=neighbors_distances)
        
if __name__ == '__main__':
    input_file = ""
    output_file = ""
    change_dataset(input_file, output_file) 