from typing import List, Tuple

import numpy as np
import pandas as pd
from tqdm import tqdm


def aggregate_samples(
        embeddings: np.array,
        meta_df: pd.DataFrame,
        grouping: List,
) -> Tuple[np.array, pd.DataFrame]:
    # make sure embeddings normalized
    embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

    aggregate_embeddings = []
    print(f'Generating {grouping} level embeddings...')
    aggregate_meta_df = pd.DataFrame(columns=meta_df.columns)
    for i, (_, indices) in tqdm(enumerate(meta_df.groupby(grouping).indices.items())):
        aggregate_embedding = np.mean(embeddings[indices, :], axis=0)
        # first entry for metadata
        meta = meta_df.iloc[indices[0]]
        aggregate_meta_df = pd.concat([aggregate_meta_df, pd.DataFrame.from_records([dict(meta[meta_df.columns])])])
        aggregate_embeddings.append(aggregate_embedding)

    aggregate_embeddings = np.array(aggregate_embeddings)
    aggregate_embeddings = aggregate_embeddings / np.linalg.norm(aggregate_embeddings, axis=1, keepdims=True)
    aggregate_meta_df = aggregate_meta_df.reset_index(drop=True)

    return aggregate_embeddings, aggregate_meta_df

