'''
process the strychnine data from the Route Similarity paper
'''
import os
import json
import hydra
import pandas as pd

from setup_path import *
from multiguide.helpers import PROJECT_ROOT

@hydra.main(config_path='../configs', config_name='config.yaml', version_base=None)
def process_strychnine_data(cfg):
    '''
    '''
    # turn each route into a list of retro reactions
    compound_dir = os.path.join(
        PROJECT_ROOT,
        'data', 
        'route_similarity_data',
        'Outputs', 
        cfg.route_dataset.compound_name
    )
    processed_dir = os.path.join(
        PROJECT_ROOT,
        'data', 
        'route_similarity_data',
        'processed'
    )
    os.makedirs(processed_dir, exist_ok=True)
    # extract all routes in turn
    route_files = [f for f in os.listdir(compound_dir) if f.endswith('_tree.json')]
    print(f'Found {len(route_files)} route files in {compound_dir}')
    # optional: do custom processing to save the original info for each route in the same process json file.
    all_processed_routes = []
    for route_file in route_files:
        route_path = os.path.join(compound_dir, route_file)
        route_name = route_file.split('_tree.json')[0]
        route = json.load(open(route_path, 'r'))
        print(f'Loaded route from {route_path}')
        # extract reactions from the route
        children = route['children']
        route_as_list = []
        while len(children) > 0:
            child = children.pop(0)
            if child['type'] == 'reaction':
                reaction = child['metadata']['reaction_smiles']
                retro_reaction = reaction.split('>>')[1] + '>>' + reaction.split('>>')[0]
                route_as_list.append(retro_reaction)
            # applies to both reactions and mols
            if 'children' in child:
                children.extend(child['children'])
        print(f'Route {route_name} has {len(route_as_list)} reactions')
        all_processed_routes.append({
            'route_name': route_name,
            'route_as_list': route_as_list
        })
    # save route_as_list to json file
    processed_routes_path = os.path.join(processed_dir, f'{cfg.route_dataset.compound_name}.json')
    with open(processed_routes_path, 'w') as f:
        json.dump(all_processed_routes, f, indent=4)
    print(f'Saved all processed routes to {processed_routes_path}')

if __name__ == '__main__':
    process_strychnine_data()