import math

import TraceStatistics
import alg
import alg2
import numpy as np
import Benchmark as bmk


for K in[200, 250, 300]:
    scalar = 19.83133673667908
    T = 400
    L = 3

    granularity = 0.1
    KList,minV,maxV = TraceStatistics.retureKList(T)

    for l in range(L):
        if KList[l]/T>=1:
            print("Average Consumption ", KList[l]/T, " of level ", l, " is greater than 1.")
            assert KList[l]/T<1
    Traces = 5
    Rounds = 1
    for j in range(Rounds):
        print("\n")
        print("K is", K)
        praticalCRs = []

        consumptionArray = alg2.generateConsumptionArrayTrace()
        valueOracle,donothingValueOracle,consumptionOracle = alg2.generateOracle(T)
        gamma = (1-math.pow(K,-0.5))/L
        print("T: ", T, "; L: ",L, "; level 1 value range: [", minV[0], ",", maxV[0], "]")
        for l in range(L):
            print("Level" , l+1, " value range:[", minV[l], ",", maxV[l], "]")
        print("KList: ", KList)

        accumulatedReward = 0
        averageRunTime = 0
        accumulatedRewardGreedy = 0
        accumulatedRewardRandom = 0
        accumulatedRewardAdaptive = 0
        accumulatedRewardWeakOLS = 0
        accumulatedRewardMAB = 0
        accumulatedRewardGOK = 0
        accumulatedRewardMPC = 0

        for i in range(Traces):
            thisAccumulatedReward, thisAverageRunTime = alg.OLSTrace(
                T=T,
                L=L,
                K=K,
                gamma=gamma,
                consumptionArray=consumptionArray,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
                granularity=granularity
            )
            accumulatedReward+=thisAccumulatedReward
            averageRunTime += thisAverageRunTime

            thisAccumulatedRewardGreedy, thisAverageRunTimeGreedy = bmk.Greedy(
                T=T,
                K=K,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
            )
            accumulatedRewardGreedy+=thisAccumulatedRewardGreedy

            thisAccumulatedRewardRandom, thisAverageRunTimeRandom = bmk.Random(
                T=T,
                L=L,
                K=K,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
            )
            accumulatedRewardRandom+=thisAccumulatedRewardRandom

            thisAccumulatedRewardAdaptive, thisAverageRunTimeAdaptive = bmk.Adaptive(
                T=T,
                L=L,
                K=K,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
                KList=KList
            )
            accumulatedRewardAdaptive+=thisAccumulatedRewardAdaptive

            thisAccumulatedRewardWeakOLS, thisAverageRunTimeWeakOLS = bmk.OLSMinus(
                T=T,
                K=K,
                gamma=gamma,
                consumptionArray=consumptionArray,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
                granularity=granularity
            )
            accumulatedRewardWeakOLS+=thisAccumulatedRewardWeakOLS

            thisAccumulatedRewardMAB, thisAverageRunTimeMAB = bmk.MAB(
                T=T,
                K=K,
                L=L,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
            )
            accumulatedRewardMAB+=thisAccumulatedRewardMAB

            thisAccumulatedRewardGOK, thisAverageRunTimeGOK = bmk.GOK(
                T=T,
                K=K,
                L=L,
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
            )
            accumulatedRewardGOK+=thisAccumulatedRewardGOK

            thisAccumulatedRewardMPC, thisAverageRunTimeMPC = bmk.MPC(
                T=T,
                K=K,
                L=L,
                maxValue=np.array([0.960873635205598,0.9876375534832225,0.98775068720183]),
                avgConsumption=np.array([6.059999999999999,10.654,18.446]),
                consumptionOracle=consumptionOracle,
                valueArray=valueOracle,
                doNothingValue=donothingValueOracle,
            )
            accumulatedRewardMPC+=thisAccumulatedRewardMPC

        expectedReward = accumulatedReward/Traces
        expectedRewardGreedy = accumulatedRewardGreedy/Traces
        expectedRewardRandom = accumulatedRewardRandom/Traces
        expectedRewardAdaptive = accumulatedRewardAdaptive/Traces
        expectedRewardWeakOLS = accumulatedRewardWeakOLS/Traces
        expectedRewardMAB = accumulatedRewardMAB/Traces
        expectedRewardGOK = accumulatedRewardGOK/Traces
        expectedRewardMPC = accumulatedRewardMPC/Traces

        averageRunTime = averageRunTime/Traces
        print("averageRunTime(per task)(ms): ", averageRunTime*1000)

        print("accumulatedReward: ", expectedReward)
        print("accumulatedReward Greedy: ", expectedRewardGreedy)
        print("accumulatedReward Random: ", expectedRewardRandom)
        print("accumulatedReward Adaptive: ", expectedRewardAdaptive)
        print("accumulatedReward OLS-: ", expectedRewardWeakOLS)
        print("accumulatedReward MAB: ", expectedRewardMAB)
        print("accumulatedReward GOK: ", expectedRewardGOK)
        print("accumulatedReward MPC: ", expectedRewardMPC)

        print("practical CR: ", expectedReward/np.sum(valueOracle))
        praticalCRs.append(expectedReward/np.sum(valueOracle))
        theoreticalCR = gamma
        print("theoretical CR: ", theoreticalCR)
        print("theoretical accumulated reward lowerbound: ", np.sum(valueOracle)*theoreticalCR)
        print("practical CR: ", praticalCRs)