import argparse
import csv
import json
import pathlib

import ecole as ec
import numpy as np
import pandas as pd


def OnlineRL_Main(argsproblem,lp_path):
    '''
    :param argsproblem: 'item_placement',"setcover","cauctions","indset","facilities"
    :return:
    '''
    argtask = 'dual'
    argsdebug = False

    # check the Ecole version installed
    assert ec.__version__ == "0.7.3", "Wrong Ecole version."

    print(f"Evaluating the {argtask} task agent on the {argsproblem} problem.")

    # collect the instance files
    if argsproblem == 'item_placement':
        instances_path = pathlib.Path(lp_path)
        results_file = pathlib.Path(f"results/{argtask}/1_item_placement_OnlineRL.csv")

    print(f"Processing instances from {instances_path.resolve()}")
    if argsproblem == 'item_placement':
        instance_files = [test for test in list(instances_path.glob('*.mps.gz'))] # lp
    else:
        instance_files = [test for test in list(instances_path.glob('*.lp'))] # lp
        # instance_files = instance_files * 2
    print(f"Saving results to {results_file.resolve()}")
    results_file.parent.mkdir(parents=True, exist_ok=True)
    results_fieldnames = ['instance', 'seed','dual_bound','primal_bound',
                          'objective_offset', 'cumulated_reward','solvingtime','nnodes']
    with open(results_file, mode='w') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=results_fieldnames)
        writer.writeheader()

    import sys

    sys.path.insert(1, str(pathlib.Path.cwd()))

    # set up the proper agent, environment and goal for the task
    if argtask == "dual":
        from agents.dual_OnlineRL import Policy, ObservationFunction
        from environments import BranchingOpen as Environment

        memory_limit = 8796093022207  # maximum

    # override from command-line argument if provided
    time_limit = 900

    if argtask == "primal":
        from rewards import TimeLimitPrimalIntegral as BoundIntegral

    elif argtask == "dual":
        from rewards import TimeLimitDualIntegral as BoundIntegral

    elif argtask == "config":
        from rewards import TimeLimitPrimalDualIntegral as BoundIntegral

    # evaluation loop
    for seed, instance in enumerate(instance_files):
        tmp_instance_name = str(instance).split('/')[-1].split('.')[0]
        observation_function = ObservationFunction(problem=argsproblem)
        policy = Policy(problem=argsproblem)

        env = Environment(
            time_limit=time_limit,
            observation_function=observation_function,
            scip_params={'limits/memory': memory_limit},
            reward_function=-BoundIntegral()
        )

        # seed both the agent and the environment (deterministic behavior)
        observation_function.seed(seed)
        policy.seed(seed)
        env.seed(seed)

        objective_offset = 0

        print()
        print(f"Instance {tmp_instance_name}")
        print(f"  seed: {seed}")
        print(f"  objective offset: {objective_offset}")

        # reset the environment
        observation, action_set, reward, done, info = env.reset(str(instance))

        if argsdebug:
            print(f"  info: {info}")
            print(f"  reward: {reward}")
            print(f"  action_set: {action_set}")

        cumulated_reward = 0  # discard initial reward

        # loop over the environment
        while not done:
            action = policy(action_set, observation)

            if argsdebug:
                print(f"  action: {action}")

            observation, action_set, reward, done, info = env.step(action)

            if argsdebug:
                print(f"  info: {info}")
                print(f"  reward: {reward}")
                print(f"  action_set: {action_set}")

            cumulated_reward += reward

        print(f"  cumulated reward (to be maximized): {cumulated_reward}")

        # save instance results
        with open(results_file, mode='a') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=results_fieldnames)
            writer.writerow({
                'instance': str(instance),
                'seed': seed,
                'dual_bound': info['dual_bound'],
                'primal_bound': info['primal_bound'],
                'objective_offset': objective_offset,
                'cumulated_reward': cumulated_reward,
                'solvingtime': info['solvingtime'],
                'nnodes': info['nnodes']
            })