from coba.learners import VowpalEpsilonLearner
from coba.environments import Environments
from coba.experiments  import Experiment, SimpleEnvironmentTask

from memory import EMT
from learners import StackedMemLearner
from evaluators import SlimOnlineOnPolicyEvaluation

config  = {"processes": 8, "chunk_by":'task', 'maxchunksperchild': 2 }
epsilon = 0.1

if __name__ == '__main__':
    learners = [
        VowpalEpsilonLearner(epsilon, features=["a","xa","xxa"]),
        StackedMemLearner   (epsilon, EMT(bound=1000 , scorer=3, router=2, split=100,  interactions=['xa']), "xxa", False, True),
        StackedMemLearner   (epsilon, EMT(bound=2000 , scorer=3, router=2, split=100,  interactions=['xa']), "xxa", False, True),
        StackedMemLearner   (epsilon, EMT(bound=16000, scorer=3, router=2, split=100,  interactions=['xa']), "xxa", False, True),
        StackedMemLearner   (epsilon, EMT(bound=32000, scorer=3, router=2, split=100,  interactions=['xa']), "xxa", False, True),
    ]

    description = "Full 50 replicate run for the ICLR 2023 paper including only datasets that have 32,000 examples or more."
    
    #enter a path here if you'd like to save your results while it runs
    #experiment execution can be stopped and resumed at a later date if needed
    log         = None#"./results/ICLR-2023-bounded.log.gz"

    environments = Environments.from_template("./experiments/bounded.json")
    environments = sorted(environments, key=lambda e: (e.params['shuffle'],e.params['openml_task']))

    result = Experiment(environments, learners, description, environment_task=SimpleEnvironmentTask(), evaluation_task=SlimOnlineOnPolicyEvaluation()).config(**config).evaluate(log)
    result.filter_fin().plot_learners(y='reward')
