import os

from src.config import MERGED_DATASET_NAME, OUTPUTS_DIR, SMILES_DATASET_FILENAME
from src.merging import create_smiles_dataset, merge_datasets
from src.pipelines.chemspace_pipeline import ChemSpacePipeline
from src.pipelines.coconut_pipeline import COCONUTPipeline
from src.pipelines.mcule_pipeline import MculePipeline
from src.pipelines.pubchem_pipeline import PubChemPipeline
from src.pipelines.supernatural_3_pipeline import SuperNatural3Pipeline
from src.pipelines.unichem_pipeline import UniChemPipeline

if __name__ == "__main__":
    pipelines = [
        UniChemPipeline(),
        PubChemPipeline(),
        MculePipeline(),
        ChemSpacePipeline(),
        COCONUTPipeline(),
        SuperNatural3Pipeline(),
    ]

    if not os.path.exists(OUTPUTS_DIR):
        os.mkdir(OUTPUTS_DIR)

    for pipeline in pipelines:
        print(f"Starting processing {pipeline.source_name} pipeline")
        pipeline.process()

    merged_dataset_file_path = str(os.path.join(OUTPUTS_DIR, MERGED_DATASET_NAME))
    merge_datasets(pipelines, merged_dataset_file_path)

    smiles_dataset_file_path = str(os.path.join(OUTPUTS_DIR, SMILES_DATASET_FILENAME))
    create_smiles_dataset(merged_dataset_file_path, smiles_dataset_file_path)
