import random
from pathlib import Path

import numpy as np
import pandas as pd

import cfg


def create():
    # set random seed
    random.seed(0)
    np.random.seed(0)

    # create metadata
    df = pd.read_csv(cfg.path_data_scene, delimiter=';')

    # create embeddings
    df_attr = df.loc[:, df.columns.str.startswith('attr')]
    embeddings = df_attr.values

    # create metadata
    df_label = df.loc[:, ~df.columns.str.startswith('attr')]
    df_label = df_label.rename(columns=lambda x: f'{cfg.label_prefix}{x}')
    df_label['subset'] = np.where(np.random.rand(len(df_label)) < 0.2, cfg.tag_evaluate, cfg.tag_unlabelled)

    # save data and metadata
    df_label.to_csv(Path(cfg.path_data, 'scene', 'metadata.csv'), index=False) # save df
    np.save(Path(cfg.path_data, 'scene', 'data_embedding.npy'), embeddings)


if __name__ == '__main__':
    create()
