import os
import json
import time
import argparse
import subprocess

from multiprocessing.pool import ThreadPool

from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical
from smac import Scenario, AlgorithmConfigurationFacade


parser = argparse.ArgumentParser(description='training PAP.')
parser.add_argument('--config', type=str, default=None, help='algorithm configuration')
parser.add_argument('--train', type=str, default=None, help='train data')
parser.add_argument('--history', type=str, default=None, help='PAP history')
parser.add_argument('--output', type=str, default=None, help='output directory')
parser.add_argument('--processes', type=int, default=1, help='number of processes used in training')
parser.add_argument('--seed', type=int, default=0, help='random seed')
args = parser.parse_args()

with open(args.config, 'r') as f:
    json_config = json.load(f)
with open(args.train, 'r') as f:
    json_train = json.load(f)
with open(args.history, 'r') as f:
    json_history = json.load(f)

OUTPUT_DIR = args.output
CACHE_DIR = os.path.join(OUTPUT_DIR, 'cache')


def print_with_time(content):
    print(time.strftime("%Y-%m-%d %H:%M:%S UTC %z", time.gmtime()))
    print(content, '\n')


def open_and_wait_timeout(cmd, logfile, timeout=None):
    with open(logfile, 'w') as f:
        process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f)
        print_with_time('open cmd: {}'.format(cmd))
        try:
            process.wait(timeout=timeout)
            print_with_time('finished cmd: {}'.format(cmd))
        except subprocess.TimeoutExpired:
            process.terminate()
            print_with_time('timeout cmd: {}'.format(cmd))


def open_and_wait(cmd, logfile):
    with open(logfile, 'w') as f:
        process = subprocess.Popen(cmd, shell=True, stdout=f, stderr=f)
        print_with_time('open cmd: {}'.format(cmd))
        process.wait()
        print_with_time('finished cmd: {}'.format(cmd))


def fges_from_cfg(cfg, seed=0):
    print_with_time('evaluate fges cfg - {}'.format(cfg))
    name_temp_pair = {}

    tp = ThreadPool(args.processes)
    for d in json_train['train']:
        temp = 'fges_{}-{}_{}-{}_{}-{}'.format(
            'penalty', cfg['penalty'],
            'faithful', cfg['faithful'],
            'degree', cfg['degree']
        )
        name_temp_pair[d['name']] = temp

        cmd = 'python ./algorithms/fges_params_runner.py' + \
                ' --data {}'.format(d['data']) + \
                ' --truth {}'.format(d['truth']) + \
                ' --penalty {}'.format(cfg['penalty']) + \
                ' --faithful {}'.format(cfg['faithful']) + \
                ' --degree {}'.format(cfg['degree']) + \
                ' --est {}'.format(os.path.join(
                    CACHE_DIR,
                    '{}_{}_est.json'.format(d['name'], temp)
                )) + \
                ' --metrics {}'.format(os.path.join(
                    CACHE_DIR,
                    '{}_{}_metrics.json'.format(d['name'], temp)
                )) + \
                ' --cpulimit {}'.format(7200)
        tp.apply_async(open_and_wait, (cmd, os.path.join(
                    CACHE_DIR,
                    '{}_{}_logger.log'.format(d['name'], temp)
                )))
    tp.close()
    tp.join()

    score = 0
    for d in json_train['train']:
        if not os.path.exists(os.path.join(CACHE_DIR, '{}_{}_metrics.json'.format(d['name'], name_temp_pair[d['name']]))):
            score += json_history['best'][d['name']]
            continue

        with open(os.path.join(CACHE_DIR, '{}_{}_metrics.json'.format(d['name'], name_temp_pair[d['name']]))) as f:
            metrics = json.load(f)
        score += max(json_history['best'][d['name']], metrics['f1_adj']+metrics['f1_arrow'])
    
    print_with_time('evaluate fges cfg fin - {}'.format(cfg))
    return - score


def pcstable_from_cfg(cfg, seed=0):
    print_with_time('evaluate pcstable cfg - {}'.format(cfg))
    name_temp_pair = {}

    tp = ThreadPool(args.processes)
    for d in json_train['train']:
        temp = 'pcstable_{}-{}_{}-{}'.format(
            'alpha', cfg['alpha'],
            'depth', cfg['depth']
        )
        name_temp_pair[d['name']] = temp

        cmd = 'python ./algorithms/pcstable_params_runner.py' + \
                ' --data {}'.format(d['data']) + \
                ' --truth {}'.format(d['truth']) + \
                ' --alpha {}'.format(cfg['alpha']) + \
                ' --depth {}'.format(cfg['depth']) + \
                ' --est {}'.format(os.path.join(
                    CACHE_DIR,
                    '{}_{}_est.json'.format(d['name'], temp)
                )) + \
                ' --metrics {}'.format(os.path.join(
                    CACHE_DIR,
                    '{}_{}_metrics.json'.format(d['name'], temp)
                )) + \
                ' --cpulimit {}'.format(7200)
        tp.apply_async(open_and_wait, (cmd, os.path.join(
                    CACHE_DIR,
                    '{}_{}_logger.log'.format(d['name'], temp)
                )))
    tp.close()
    tp.join()

    score = 0
    for d in json_train['train']:
        if not os.path.exists(os.path.join(CACHE_DIR, '{}_{}_metrics.json'.format(d['name'], name_temp_pair[d['name']]))):
            print_with_time('file not exists - {}'.format(os.path.join(CACHE_DIR, '{}_{}_metrics.json'.format(d['name'], name_temp_pair[d['name']]))))
            score += json_history['best'][d['name']]
            continue

        with open(os.path.join(CACHE_DIR, '{}_{}_metrics.json'.format(d['name'], name_temp_pair[d['name']]))) as f:
            metrics = json.load(f)
        score += max(json_history['best'][d['name']], metrics['f1_adj']+metrics['f1_arrow'])
    
    print_with_time('evaluate pcstable cfg fin - {}'.format(cfg))
    return - score

    
def main():
    print_with_time('enter smac - {}'.format(args))
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)
    if not os.path.exists(CACHE_DIR):
        os.makedirs(CACHE_DIR)

    configspace = ConfigurationSpace()
    for config in json_config['configs']:
        if config['type'] == 'Integer':
            configspace.add_hyperparameter(Integer(config['name'], (config['lower'], config['upper']), 
                                                   log=config['log']))
        elif config['type'] == 'Float':
            configspace.add_hyperparameter(Float(config['name'], (config['lower'], config['upper']), 
                                                   log=config['log']))
        elif config['type'] == 'Categorical':
            configspace.add_hyperparameter(Categorical(config['name'], config['items']))

    scenario = Scenario(
        configspace=configspace,
        deterministic=True,
        output_directory=os.path.join(OUTPUT_DIR, 'smac_output'),
        n_trials=10, # default in 100
        seed=args.seed
    )
    print_with_time(scenario.configspace.get_hyperparameters())

    if json_config['algorithm'] == 'fges':
        smac = AlgorithmConfigurationFacade(scenario, fges_from_cfg)
    elif json_config['algorithm'] == 'pcstable':
        smac = AlgorithmConfigurationFacade(scenario, pcstable_from_cfg)
    best_found_config = smac.optimize()
    print_with_time('best found config: {}'.format(best_found_config))

    result = {
        'config': dict(best_found_config),
        'cost': smac.runhistory.get_cost(best_found_config)
    }

    with open(os.path.join(OUTPUT_DIR, 'result.json'), 'w') as f:
        json.dump(result, f, indent='\t')


if __name__ == '__main__':
    main()





