import random
from pathlib import Path

import numpy as np
import pandas as pd

import cfg

def create():
    # set random seed
    random.seed(0)
    np.random.seed(0)

    # create metadata
    df = pd.read_csv(cfg.path_letter, header=None)
    df.columns = ['raw_label'] + [f'attr_{i}' for i in range(1, df.shape[1])]

    # create label cols
    df_labels = pd.get_dummies(df['raw_label'], prefix=cfg.label_prefix[:-1]).astype(int)

    # create embeddings
    df_attr = df.loc[:, df.columns.str.startswith('attr')]
    embeddings = df_attr.values

    # create metadata
    df_labels['subset'] = np.where(np.random.rand(len(df_labels)) < 0.2, cfg.tag_evaluate, cfg.tag_unlabelled)

    # save data and metadata
    df_labels.to_csv(Path(cfg.path_data, 'letter', 'metadata.csv'), index=False)  # save df
    np.save(Path(cfg.path_data, 'letter', 'data_embedding.npy'), embeddings)


if __name__ == '__main__':
    create()