import numpy as np
import os

num_clusters = 10
d = 64
num_vectors_per_cluster = 160
intra_cluster_noise = 0.2

cluster_centers = np.random.randn(num_clusters, d)

#vecs = vecs - vecs.mean(0,keepdims=True)
cluster_centers = cluster_centers/np.linalg.norm(cluster_centers, axis=-1, keepdims=True)
vecs = cluster_centers.repeat(num_vectors_per_cluster,0)

print(vecs.shape)

noise_term = np.random.randn(*vecs.shape)
noise_term = noise_term/np.linalg.norm(noise_term, axis=-1, keepdims=True)
vecs = vecs + intra_cluster_noise*noise_term
vecs = vecs/np.linalg.norm(vecs, axis=-1, keepdims=True)

print(vecs.shape)

np.savetxt(os.path.join('/root/main/ranking/data/synthetic_cluster', f"{num_clusters}_{d}_{num_vectors_per_cluster}.txt"), vecs)