"""Concatenate Voronoi features from multiple data files generated by slurm job array
into single file.
"""

# %%
import os
from glob import glob

import pandas as pd
from pymatviz.enums import Key
from tqdm import tqdm

__author__ = "Janosh Riebesell"
__date__ = "2022-08-16"


# %%
module_dir = os.path.dirname(__file__)
date, data = "2022-11-25", "mp"
glob_pattern = f"{date}-features-{data}/voronoi-features-{data}-*.csv.bz2"
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}"))
print(f"Found {len(file_paths):,} files for {glob_pattern = }")

dfs: dict[str, pd.DataFrame] = {}


# %%
for file_path in tqdm(file_paths):
    if file_path in dfs:
        continue
    dfs[file_path] = pd.read_csv(file_path).set_index(Key.mat_id)

df_features = pd.concat(dfs.values()).round(4)

ax = df_features.isna().sum().value_counts().T.plot.bar()
ax.set(xlabel="# NaNs", ylabel="# columns", title="NaNs per column")


# %%
out_path = file_paths[0].rsplit("/", 1)[0]
df_features.to_csv(f"{out_path}.csv.bz2")
