from nltk.tokenize import word_tokenize
import json
import subprocess
import time
from askbert_api import call_askbert
import os
import redis

def start_flask():
    print('Starting flask')
    subprocess.Popen(['/home/mhmh/anaconda3/envs/pytorch/bin/python', 'askbert/askbert.py', '6001'])
    time.sleep(3)


def clean(s):
    garbage_chars = ['\n', '*', '-', '(', ')', '<', '>']
    for c in garbage_chars:
        s = s.replace(c, ' ')
    return s.strip().lower()


def generate2id(dat):
    game, idx = dat
    conn_openie = redis.Redis(host='localhost', port=6379, db=idx)
    conn_openie.flushdb()

    vocab = set()
    all_words = []

    with open(game, 'r') as f:
        data = json.load(f)
    gamename = game.split('/')[1].split('.')[0].split('_')[1]

    context_attr = ""
    attr_file = 'attrs/' + gamename + '_attr.txt'
    if os.path.isfile(attr_file):
        with open(attr_file, 'r') as f:
            context_attr = str(f.read())
    if context_attr == "":
        context_attr = "talkable, seen, lieable, enterable, nodwarf, indoors, visited, handed, lockable, surface, thing, " \
                       "water_room, unlock, lost, afflicted, is_treasure, converse, mentioned, male, npcworn, no_article, " \
                       "relevant, scored, queryable, town, pluggable, happy, is_followable, legible, multitude, burning, " \
                       "room, clothing, underneath, ward_area, little, intact, animate, bled_in, supporter, readable, " \
                       "openable, near, nonlocal, door, plugged, sittable, toolbit, vehicle, light, lens_searchable, " \
                       "open, familiar, is_scroll, aimable, takeable, static, unique, concealed, vowelstart, alcoholic, " \
                       "bodypart, general, is_spell, full, dry_land, pushable, known, proper, inside, clean, " \
                       "ambiguously_plural, container, edible, treasure, can_plug, weapon, is_arrow, insubstantial, " \
                       "pluralname, transparent, is_coin, air_room, scenery, on, is_spell_book, burnt, burnable, " \
                       "auto_searched, locked, switchable, absent, rockable, beenunlocked, progressing, severed, worn, " \
                       "windy, stone, random, neuter, legible, female, asleep, wiped"
    ent = set()
    ent.add('you')
    ent.add('inv')
    visited = set()
    t = time.time()
    calls = 0
    print(len(data))
    #with open('zorkd.data', 'r') as f:
    #    atrs = [eval(str(a).replace('from askbert:', '').strip()) for a in f.readlines()]

    for d in data:

        context_redis = clean('[LOC] ' + d['loc_desc'] + ' [INV] ' + d['inv_desc'] + ' [OBS] ' + d['obs'])
        context = context_redis + ' [atr] ' + context_attr
        if context not in visited:
            words = word_tokenize(context)
            vocab.update(words)
            all_words.extend(words)
            entities = call_askbert(context, threshold=0.3, attribute=True)
            if entities is None:
                continue
            entities = entities['entities']
            conn_openie.set(str(context_redis), str(entities))

            for e in entities['attributes'].values():
                if None in e:
                    continue
                ent.update(e.strip())

            for e in ['location', 'object_surr', 'objs_inv']:
                curr_e = entities[e]
                if None in curr_e:
                    print(entities)
                    continue
                ent.update(curr_e.strip())
            visited.add(context)
            calls += 1
    with open('../data/' + gamename + '_entity2id.tsv', 'w') as f:
        for i, e in enumerate(ent):
            f.write('_'.join(e.split()) + '\t' + str(i) + '\n')
    conn_openie.dump(gamename + '.rdb')
    print(game)
    print((time.time() - t) / calls)


if __name__ == '__main__':
    from glob import glob
    import multiprocessing
    from functools import partial
    #start_flask()
    #generate2id('qa_data/data_zork1.json')
    #exit()
    #games = glob('qa_data/*')
    games = ['qa_data/data_zork1.json', 'qa_data/data_library.json', 'qa_data/data_detective.json', 'qa_data/data_balances.json',
                'qa_data/data_pentari.json', 'qa_data/data_ztuu.json', 'qa_data/data_ludicorp.json', 'qa_data/data_deephome.json',
                'qa_data/data_temple.json']
    games = [(game, i) for i, game in enumerate(games)]

    #for g in games:
    #    generate2id(g)
    #exit()
    manager = multiprocessing.Manager()  # create SyncManager
    matches = manager.list()  # create a shared list here
    link_matches = partial(generate2id, matches)  # create one arg callable to
    # pass to pool.map()
    pool = multiprocessing.Pool(processes=6)

    #dones = [a.strip() for a in dones]
    #games = [g for g in games if g not in dones]
    #print(len(games), games)
    pool.map(generate2id, games)  # apply partial to files list
    pool.close()
    pool.join()
