from tqdm import tqdm 
import os
# from chemutils import vocabulary, smiles2word 
from chemutils import *
import numpy as np 


### clean smiles set 
clean_smiles_database = "data/clean_zinc.txt"
with open(clean_smiles_database, 'r') as fin:
	lines = fin.readlines() 
clean_smiles_set = set([line.strip() for line in lines])


levothyroxine = "C1=C(C=C(C(=C1I)OC2=CC(=C(C(=C2)I)O)I)I)CC(C(=O)O)N"
amoxicillin = "CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=C(C=C3)O)N)C(=O)O)C"
hydrochlorothiazide = "C1NC2=CC(=C(C=C2S(=O)(=O)N1)S(=O)(=O)N)Cl"

name, target_smiles = 'hydrochlorothiazide', hydrochlorothiazide 


def oracle(smiles):
	try:
		return similarity(smiles, target_smiles)
	except:
		return 0.0 



### write results 
output_file = "data/zinc_hydrochlorothiazide_clean.txt"
with open(output_file, 'w') as fout:
	for smiles in tqdm(clean_smiles_set):
		label = oracle(smiles)
		fout.write(smiles + '\t' + str(label) + '\n')





