import numpy as np
import json
from sklearn.decomposition import PCA

# Parameters
num_archetypes = 5
original_dim = 50
proj_dims = [10, 2]
seed = 42
pca_support_points = 100  # Total points used for PCA

# Set seed and generate data
np.random.seed(seed)
archetypes = np.random.randn(num_archetypes, original_dim)
extra_points = np.random.randn(pca_support_points - num_archetypes, original_dim)
pca_input = np.vstack([archetypes, extra_points])  # shape (100, 50)

print(f"Generated {num_archetypes} archetypes with original dimension {original_dim}.")
print(archetypes)

# Project using PCA
projections = {}
pca_matrices = {}  # Store PCA transformation matrices
for d in proj_dims:
    pca = PCA(n_components=d)
    projected_all = pca.fit_transform(pca_input)
    projections[d] = projected_all[:num_archetypes].tolist()  # Only keep the archetypes

    # Store PCA transformation matrix and related info
    pca_matrices[d] = pca.components_.tolist()

    print(f"PCA for {d}D: explained variance ratio = {pca.explained_variance_ratio_}")

# Save each PCA matrix to a separate JSON file
for d, pca_info in pca_matrices.items():
    filename = f"pca_matrix_{d}d.json"
    with open(filename, "w") as f:
        json.dump(pca_info, f, indent=4)
    print(f"PCA matrix for {d}D saved to '{filename}'")

# Output format
for d in proj_dims:
    config = {
        "archetypes": projections[d],
        "num_samples": 5000,
        "dimensions": d,
        "seed": seed,
    }
    print(json.dumps(config, indent=4))
