from cmath import log, pi, sqrt
import numpy as np
from random import sample, shuffle
import os, sys
import pickle
import matplotlib.pyplot as plt
from StandardEnv import UserManager, ArticleManager
import argparse
import shutil

currentdir = os.path.dirname(os.path.realpath(__file__))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)

from StandardEnv import UserManager, ArticleManager
from EpsilonGreedyMultiArmedBandit import EpsilonGreedyMultiArmedBandit
from UCB import UCB
from RUCB import RUCB
from TS import TS
from RTS import RTS
from LinUCB import LinUCB
from LinTS import LinTS
from CWOFUL import CWOFUL
from RobustLinTS import RobustLinTS

class simulateOnlineData:
    def __init__(self, context_dimension, testing_iterations, articles,
                 users, attackmethod, TargetId, budget, actionset, noise=lambda: 0, NoiseScale=0.0, poolArticleSize=None):

        self.context_dimension = context_dimension
        self.testing_iterations = testing_iterations
        self.articles = articles
        self.users = users
        self.attackmethod = attackmethod
        self.TargetId = TargetId
        self.budget = budget
        self.actionset = actionset
        self.noise = noise
        self.NoiseScale = NoiseScale

        if poolArticleSize is None:
            self.poolArticleSize = len(self.articles)
        else:
            self.poolArticleSize = poolArticleSize

        self.batchSize = 1

    def beta(self,N):
        delta = 0.05
        return self.NoiseScale*np.sqrt((2/float(N))*np.log(np.pi*np.pi*self.context_dimension*N*N/float(3*delta)))

    def getTheta(self):
        Theta = np.zeros(shape = (self.context_dimension, len(self.users)))
        for i in range(len(self.users)):
            Theta.T[i] = self.users[i].theta
        return Theta

    def getOptimalThetaId(self,user):
        optimal = 0
        maxtheta = 0
        for j in range(context_dimension):
            if user.theta[j] > maxtheta:
                optimal = j
                maxtheta = user.theta[j]
    #    print(optimal)
        return optimal

    def getReward(self, user, pickedArticle):
        return np.dot(user.theta, pickedArticle.featureVector)

    def GetOptimalReward(self, user, articlePool):		
        maxReward = float('-inf')
        maxx = None
        for x in articlePool:	 
            reward = self.getReward(user, x)
            if reward > maxReward:
                maxReward = reward
                maxx = x
        return maxReward, maxx
    
    def getL2Diff(self, x, y):
        return np.linalg.norm(x-y) # L2 norm

    def regulateArticlePool(self):
#        self.articlePool = sample(self.articles,self.poolArticleSize)
        self.articlePool = self.articles

    def AttackMethods_MAB(self, algorithmname, Reward, iter_, PickedArticleID, MeanEstimateDic, PlayedNumberDic, attacksum, optimalthetaId, targetreward):
        Delta_Zero = 0.1
        Alpha = 0
        epsilon_zero = 0.5

        if self.attackmethod == "junsun":  
            if algorithmname == "UCB" or algorithmname == "TS" or algorithmname == "RTS" or algorithmname == "RUCB":
                if PickedArticleID == self.TargetId or iter_ < self.poolArticleSize or PlayedNumberDic['targetid'] == 0:
                    Alpha = 0
                elif attacksum < self.budget:
                    Alpha = max(0,Reward+PlayedNumberDic['pickedarticleid']*MeanEstimateDic['pickedarticleid']-(PlayedNumberDic['pickedarticleid']+1)*(MeanEstimateDic['targetid'] -2*self.beta(PlayedNumberDic['targetid'])-Delta_Zero))
                else:
                    Alpha = 0

            if algorithmname == "epsilon":
                if PickedArticleID == self.TargetId or iter_ < self.poolArticleSize or PlayedNumberDic['targetid'] == 0:
                    Alpha = 0
                elif attacksum < self.budget:
                    Alpha = max(0,Reward+PlayedNumberDic['pickedarticleid']*MeanEstimateDic['pickedarticleid']-(PlayedNumberDic['pickedarticleid']+1)*(MeanEstimateDic['targetid'] -2*self.beta(PlayedNumberDic['targetid'])))
                else:
                    Alpha = 0

        if self.attackmethod == "worst":
            if PickedArticleID == optimalthetaId and attacksum < self.budget:
                Alpha = max(0,Reward+PlayedNumberDic['pickedarticleid']*MeanEstimateDic['pickedarticleid']-(PlayedNumberDic['pickedarticleid']+1)*(-1))
            else:
                Alpha = 0

        if self.attackmethod == "oracle MAB":
            if PickedArticleID != self.TargetId and attacksum < self.budget:           
                Alpha = max(0, Reward - targetreward + epsilon_zero)
            else:
                Alpha = 0

        return Alpha
    
    def AttackMethods_ContextualBandit(self, reward, targetreward, PickedArticleID, attacksum):
        Alpha = 0
        epsilon_zero = 1

        if self.attackmethod == "oracle MAB":
            if PickedArticleID != self.TargetId and attacksum < self.budget:           
                Alpha = max(0, reward - targetreward + epsilon_zero)
            else:
                Alpha = 0

        if self.attackmethod == "oracle MAB indis" and attacksum < self.budget:
           Alpha = max(0, reward - targetreward + epsilon_zero)

        if self.attackmethod == "Garcelon":
            if PickedArticleID != self.TargetId and attacksum < self.budget:
                Alpha = max(0, reward - np.random.normal(loc = -1, scale=self.NoiseScale))
            else:
                Alpha = 0
        
        if self.attackmethod == "Garcelon indis" and attacksum < self.budget:
            Alpha = max(0, reward - np.random.normal(loc = 0, scale=self.NoiseScale))

        if self.attackmethod == "none":
           Alpha = 0

        return Alpha

    def runAlgorithms(self, algorithms):
        tim_ = []
        BatchCumlateRegret = {}
        AlgRegret = {}
        ThetaDiffList = {}
        ThetaDiff = {}
        MeanEstimate = {}
        PlayedNumber = {}
        alpha = {}
        attacksum = {}
        PullNum = {}
        for a in self.articles:
            if a.id == self.TargetId:
                TargetArticle = a 

        # Initialization
        userSize = len(self.users)
        for alg_name, alg in algorithms.items():
            MeanEstimate[alg_name] = np.zeros((self.testing_iterations,self.context_dimension))
            PlayedNumber[alg_name] = np.zeros((self.testing_iterations,self.context_dimension))
            alpha[alg_name] = -np.ones((self.testing_iterations,2))
            attacksum[alg_name] = 0
            PullNum[alg_name] = np.zeros(context_dimension)

            AlgRegret[alg_name] = []
            BatchCumlateRegret[alg_name] = []
            if alg.CanEstimateUserPreference:
                ThetaDiffList[alg_name] = []
        
        for iter_ in range(self.testing_iterations):

            # prepare to record theta estimation error

            for alg_name, alg in algorithms.items():
                if alg.CanEstimateUserPreference:
                    ThetaDiff[alg_name] = 0

            for u in self.users:
                self.regulateArticlePool()
                noise = self.noise()

                #get optimal reward for user x at time t
                OptimalReward, OptimalArticle = self.GetOptimalReward(u, self.articlePool)
                OptimalReward += noise
                if actionset == "basis_vector":
                    OptimalReward = np.clip(OptimalReward, 0, 1)

                for alg_name, alg in algorithms.items():
                    pickedArticle = alg.decide(self.articlePool, u.id)
                    PullNum[alg_name][pickedArticle.id] += 1
#                    print("pickedArticle.id",pickedArticle.id)

                    Reward = self.getReward(u, pickedArticle) + noise
                    TargetReward = self.getReward(u, TargetArticle) + noise

                    if actionset == "basis_vector":
                        Reward = np.clip(Reward, 0, 1)
                        TargetReward = np.clip(TargetReward, 0, 1)

                    regret = OptimalReward - Reward  # pseudo regret, since noise is canceled out
                    if regret < 0:
                        print("negative regret")
                        sys.exit()  
                    
                    AlgRegret[alg_name].append(regret) 
 
                    # attack
                    if self.actionset == "basis_vector":
                        PlayedNumberDic = {}
                        MeanEstimateDic = {}
                        PlayedNumberDic['pickedarticleid'] = PlayedNumber[alg_name][iter_-1,pickedArticle.id]  
                        PlayedNumberDic['targetid'] = PlayedNumber[alg_name][iter_-1,self.TargetId]
                        PlayedNumberDic['firstarm'] = PlayedNumber[alg_name][iter_-1,0]
                        MeanEstimateDic['pickedarticleid'] = MeanEstimate[alg_name][iter_-1,pickedArticle.id]
                        MeanEstimateDic['targetid'] = MeanEstimate[alg_name][iter_-1,self.TargetId]
                        MeanEstimateDic['firstarm'] = MeanEstimate[alg_name][iter_-1,0]
                        MeanEstimateDic['firstreward_of_firstarm'] = MeanEstimate[alg_name][0,0]
                        optimalthetaId = self.getOptimalThetaId(u)
                 
                        Alpha = self.AttackMethods_MAB(alg_name, Reward, iter_, pickedArticle.id, MeanEstimateDic, PlayedNumberDic, attacksum[alg_name], optimalthetaId, TargetReward)
                        alpha[alg_name][iter_,0] = Alpha
                        alpha[alg_name][iter_,1] = pickedArticle.id

                    if self.actionset == "random":
                        Alpha = self.AttackMethods_ContextualBandit(Reward, TargetReward, pickedArticle.id, attacksum[alg_name])
                        alpha[alg_name][iter_,0] = Alpha
                        alpha[alg_name][iter_,1] = pickedArticle.id

                    attacksum[alg_name] = attacksum[alg_name] + Alpha
                    Reward -= Alpha
                    alg.updateParameters(pickedArticle, Reward, u.id)
                    
                    if self.actionset == "basis_vector":
                        MeanEstimate[alg_name][iter_,:] = alg.getTheta(u.id)  # save meanestimate vector each time
                        PlayedNumber[alg_name][iter_,:] = alg.getNum(u.id)   # save article played numbers each time    
                
                    #update parameter estimation record
                    if alg.CanEstimateUserPreference:
                        ThetaDiff[alg_name] += self.getL2Diff(u.theta, alg.getTheta(u.id))

            for alg_name, alg in algorithms.items():
                if alg.CanEstimateUserPreference:
                    ThetaDiffList[alg_name] += [ThetaDiff[alg_name]/userSize]
        
            if iter_%self.batchSize == 0:
                tim_.append(iter_)
                for alg_name in algorithms.keys():
                    cumRegret = sum(AlgRegret[alg_name])
                    BatchCumlateRegret[alg_name].append(cumRegret)
#                    print("{0: <16}: cum_regret {1}".format(alg_name, round(cumRegret)))

        finalRegret = {}
        for alg_name in algorithms.keys():
            finalRegret[alg_name] = BatchCumlateRegret[alg_name][-1]

        return [finalRegret, BatchCumlateRegret, PullNum, tim_]


if __name__ == '__main__':
    
    parser = argparse.ArgumentParser(description = '')
    parser.add_argument('--time', type=int, help='Set the iteration time.')
    parser.add_argument('--trails', type=int, help='Set the trails.')
    parser.add_argument('--mload', type=str, help='Set the path for loading the mean value data.')
    parser.add_argument('--aload', type=str, help='Set the path for loading the article data.')
    parser.add_argument('--save', type=str, help='Set the path to the file to save the experiment results.')
    parser.add_argument('--actionset',type=str, help='Set actionset to be basis vector in stochastic setting or random vector in contextual setting.')
    parser.add_argument('--info',type=str, help='Set the budget C to be known or unknown to the learner.')
    parser.add_argument('--robust_comparison',type=bool, help='Decide whether we will compare different robust algorithms or not.')
    
    args = parser.parse_args()
    
    if args.time:
        testing_iterations = args.time
    else:
        testing_iterations = 5000
    
    if args.trails:
        Trails = args.trails 
    else:
        Trails = 10

    if args.mload:
        meanload = args.mload
    else:
        meanload = "./Standard/SavedData/MeanData.txt"

    if args.aload:
        articleload = args.aload
    else:
        articleload = "./Standard/SavedData/ArticleData.txt"

    if args.save:
        save = args.save
    else:
        save = "./Standard/SavedData"

    if args.actionset:
        actionset = args.actionset
    else:
        actionset = "basis_vector"  # "basis_vector" or "random"
    if actionset not in ["basis_vector", "random"]:
        raise ValueError("Invalid actionset. It must be either 'basis_vector' or 'random'.")

    if actionset == "basis_vector":
        AttackMethods = ["oracle MAB", "junsun"]
    else:
        AttackMethods = ["oracle MAB", "Garcelon"]

    if args.info:
        info = args.info
    else:
        info = "known"  # "known" or "unknown"
    if info not in ["known", "unknown"]:
        raise ValueError("Invalid info. It must be either 'known' or 'unknown'.")
    
    if args.robust_comparison:
        robust_comparison = args.robust_comparison
    else:
        robust_comparison= True # True or False
    if not isinstance(robust_comparison, bool):
        raise ValueError("Invalid robust_comparison. It must be either True or False.")

    ## Load the Environment Data ##    
    fbin = open(meanload,'rb')
    users = pickle.load(fbin)
    fbin.close
    fbin = open(articleload,'rb')
    articles = pickle.load(fbin)
    fbin.close
    print("ground-truth mean",users[0].theta)
    dir = os.path.join(save, actionset + '_' + info + '_' + str(robust_comparison) + 'RoCompare')
    os.makedirs(dir, exist_ok=True) 

    shutil.copy(meanload, os.path.join(dir, 'MeanData.txt'))
    shutil.copy(articleload, os.path.join(dir, 'ArticleData.txt'))
    
    ## Environment Settings ##
#    Total_Regret = []
    NoiseScale = 0.1  # standard deviation of Gaussian noise
    context_dimension = 5
    n_articles = 5
    poolArticleSize = 5
    if actionset == "basis_vector":
        Budget_set = [25*i for i in range(11)]  # attack budget
    else:
        Budget_set = [100*i for i in range(11)]
    targearmtid = poolArticleSize-1
    if actionset == "basis_vector":
        n_articles = context_dimension  # there can be at most context_dimension number of basis vectors

    if info == "known":
        alg_Budget_set = Budget_set
        alg_Budget_RUCB_set = Budget_set
    else:
        alg_Budget_set = [np.sqrt(testing_iterations)]*len(Budget_set) if actionset == "random" else [np.sqrt(testing_iterations * np.log(context_dimension)/context_dimension)]*len(Budget_set)
        beta = 0.35
        alg_Budget_RUCB_set = [np.sqrt(beta*testing_iterations * np.log(testing_iterations)/context_dimension)]*len(Budget_set)

    BatchCumlateRegret = {}
    if actionset == "basis_vector": 
        BatchCumlateRegret['UCB'] = np.zeros((Trails,testing_iterations))   
        BatchCumlateRegret['RUCB'] = np.zeros((Trails,testing_iterations))   
        BatchCumlateRegret['epsilon'] = np.zeros((Trails,testing_iterations))
        BatchCumlateRegret['TS'] = np.zeros((Trails,testing_iterations)) 
        BatchCumlateRegret['RTS'] = np.zeros((Trails,testing_iterations))
    else: 
        BatchCumlateRegret['LinUCB'] = np.zeros((Trails,testing_iterations))   
        BatchCumlateRegret['CWOFUL'] = np.zeros((Trails,testing_iterations))
        BatchCumlateRegret['LinTS'] = np.zeros((Trails,testing_iterations)) 
        BatchCumlateRegret['RobustLinTS'] = np.zeros((Trails,testing_iterations)) 

    total_regret = {}
    if actionset == "basis_vector": 
        total_regret['UCB'] = np.zeros((Trails,len(Budget_set)))  
        total_regret['RUCB'] = np.zeros((Trails,len(Budget_set)))  
        total_regret['epsilon'] = np.zeros((Trails,len(Budget_set)))
        total_regret['TS'] = np.zeros((Trails,len(Budget_set))) 
        total_regret['RTS'] = np.zeros((Trails,len(Budget_set)))
    else: 
        total_regret['LinUCB'] = np.zeros((Trails,len(Budget_set)))   
        total_regret['CWOFUL'] = np.zeros((Trails,len(Budget_set)))
        total_regret['LinTS'] = np.zeros((Trails,len(Budget_set))) 
        total_regret['RobustLinTS'] = np.zeros((Trails,len(Budget_set))) 

    for attack in AttackMethods:

        for i in range(len(Budget_set)):
            print(Budget_set[i]) 

            for k in range(Trails):
                simExperiment_attack = simulateOnlineData(context_dimension=context_dimension,
                                                testing_iterations=testing_iterations,
                                                articles=articles,
                                                users=users,
                                                attackmethod = attack,
                                                TargetId = targearmtid,
                                                budget = Budget_set[i],
                                                actionset = actionset,
                                                noise=lambda: np.random.normal(scale=NoiseScale),
                                                NoiseScale=NoiseScale,
                                                poolArticleSize=poolArticleSize
                                                )

                ## Initiate Bandit Algorithms ##
                algorithms = {}
                if actionset == "basis_vector":        
                    algorithms['UCB'] = UCB(num_arm=n_articles, NoiseScale=NoiseScale)
                    if robust_comparison: algorithms['RUCB'] = RUCB(num_arm=n_articles, NoiseScale=NoiseScale, budget = alg_Budget_RUCB_set[i])
                    algorithms['TS'] = TS(num_arm=n_articles, NoiseScale=NoiseScale, budget = Budget_set[i])
                    algorithms['RTS'] = RTS(num_arm=n_articles, NoiseScale=NoiseScale, budget = alg_Budget_set[i])
                else:
                    lambda_ = 1
                    delta__ = 1e-1

                    alpha_RobustLinTS = np.sqrt(context_dimension)/alg_Budget_set[i] if alg_Budget_set[i]!=0 else float('inf')
                    alpha_CWOFUL = (NoiseScale * np.sqrt(context_dimension) + np.sqrt(lambda_))/alg_Budget_set[i] if alg_Budget_set[i]!=0 else float('inf') 

                    algorithms['LinUCB'] = LinUCB(dimension=context_dimension, alpha=-1, lambda_=lambda_, delta_=delta__, NoiseScale=NoiseScale)
                    if robust_comparison: algorithms['CWOFUL'] = CWOFUL(dimension=context_dimension, beta=-1, lambda_=lambda_, delta_=delta__, NoiseScale=NoiseScale, Iteration = testing_iterations, context_dimension = context_dimension, alpha = alpha_CWOFUL, budget = alg_Budget_set[i])                                
                    algorithms['LinTS'] = LinTS(dimension=context_dimension, NoiseScale=NoiseScale, lambda_=lambda_)
                    algorithms['RobustLinTS'] = RobustLinTS(dimension=context_dimension, NoiseScale=NoiseScale, lambda_=lambda_, alpha = alpha_RobustLinTS)

                ## Run Simulation ##         
                [Regret, batchcumlateRegret, PullNum, tim_] = simExperiment_attack.runAlgorithms(algorithms) 

                for alg_name in algorithms.keys():
                    total_regret[alg_name][k,i] = Regret[alg_name]
                    BatchCumlateRegret[alg_name][k,:] = batchcumlateRegret[alg_name]
#                    if alg_name == 'RobustLinTS': print(PullNum[alg_name])

            ##define color of algorithm to present##
            algcolor = {}
            algcolor["RTS"] = algcolor["RobustLinTS"] = 'brown'
            algcolor["TS"] = algcolor["LinTS"] = (0, 0, 0.5)
            algcolor["UCB"] = algcolor["LinUCB"]  = 'black'
            algcolor["CWOFUL"] = algcolor['RUCB'] = 'gray'
            
            ## Plotting ##
            f, axa = plt.subplots(figsize=(8, 6))
            
            save_path = os.path.join(save, actionset + '_' + info + '_' + str(robust_comparison) + 'RoCompare', attack + '_' + str(Budget_set[i]) + '.pkl')
            os.makedirs(os.path.dirname(save_path), exist_ok=True)  # Create directories if they don't exist
            with open(save_path, 'wb') as file:
                pickle.dump(BatchCumlateRegret, file)

            for alg_name in algorithms.keys():  
                BatchCumlateRegretMU = BatchCumlateRegret[alg_name].mean(axis=0)
                BatchCumlateRegretSTD = BatchCumlateRegret[alg_name].std(axis=0) 
                
                length = len(BatchCumlateRegretMU)
                interval = int(length/10)
                indices = np.arange(0,length, interval)

                err = BatchCumlateRegretSTD[::interval]
                regretmean = BatchCumlateRegretMU[::interval]

                axa.errorbar(indices, regretmean, yerr = err, capsize = 4, label = alg_name, fmt = '-o',color = algcolor[alg_name], linewidth = 1)


            axa.legend(loc='upper left',prop={'size':14})
            axa.set_xlabel("Iteration",fontsize=18)
            axa.set_ylabel("Average Regret",fontsize=18)
            axa.set_title(f'C {info} \nAttack Method: {attack}, Budget: {Budget_set[i]}', weight='bold', loc='left',fontsize=18)
            axa.set_facecolor('#E6E6E6')
            axa.xaxis.grid(False)  
            axa.yaxis.grid(True, color='white')  
            axa.tick_params(axis='both', which='major', labelsize=18)
            ax = plt.gca()

            ax.spines['top'].set_color('none')
            ax.spines['right'].set_color('none')
            ax.spines['left'].set_color('none')

            plt.savefig(f'{save}/{actionset}_{info}_{robust_comparison}RoCompare/Average_Accumulated_Regret_Under_Attack_Method_{attack}_and_Budget_{Budget_set[i]}_with_Iteration_{testing_iterations}.png')

        ## Plotting ##
        f, axa = plt.subplots(figsize=(8, 6)) 
        
        save_path = os.path.join(save, actionset + '_' + info + '_' + str(robust_comparison) + 'RoCompare', attack + ' total_regret'  + '.pkl')
        os.makedirs(os.path.dirname(save_path), exist_ok=True)  # Create directories if they don't exist
        with open(save_path, 'wb') as file:
            pickle.dump(total_regret, file)        

        for alg_name in algorithms.keys():
            DiscretePointsNum = 0   
            if len(Budget_set) < DiscretePointsNum: DiscretePointsNum = 0

            total_regretMU = total_regret[alg_name].mean(axis=0)
            total_regretSTD = total_regret[alg_name].std(axis=0) 
            if DiscretePointsNum != 0: 
                total_regretMU_withline = total_regretMU[:-DiscretePointsNum]
                total_regretMU_discrete = total_regretMU[-DiscretePointsNum:]
                total_regretSTD_withline = total_regretSTD[:-DiscretePointsNum]
                total_regretSTD_discrete = total_regretSTD[-DiscretePointsNum:]
            else:
                total_regretMU_withline = total_regretMU
                total_regretSTD_withline = total_regretSTD

            x_index = list(range(1,len(Budget_set)+1-DiscretePointsNum)) + list(np.arange(len(Budget_set)-DiscretePointsNum+2/DiscretePointsNum,len(Budget_set)+2-DiscretePointsNum+2/DiscretePointsNum,2/DiscretePointsNum)) if DiscretePointsNum != 0 else list(range(1,len(Budget_set)+1-DiscretePointsNum))
            
            if DiscretePointsNum != 0: 
                axa.errorbar(x_index[:-DiscretePointsNum], total_regretMU_withline, yerr = total_regretSTD_withline, capsize = 4, label = alg_name, fmt = '-o',color = algcolor[alg_name],linewidth = 1)
                axa.errorbar(x_index[-DiscretePointsNum:], total_regretMU_discrete, yerr = total_regretSTD_discrete, capsize = 4, fmt = '-o',color = algcolor[alg_name],linestyle='None')            
            else:
                axa.errorbar(x_index, total_regretMU_withline, yerr = total_regretSTD_withline, capsize = 4, label = alg_name, fmt = '-o',color = algcolor[alg_name],linewidth = 1)


        axa.legend(loc='upper left',prop={'size':14})
        axa.set_xlabel("Adversarial Budget", fontsize=18)
        axa.set_ylabel("Average Regret",fontsize=18)
        axa.set_title(f'C {info}, Attack Method: {attack}', weight='bold', loc='left', fontsize=18)
        axa.set_facecolor('#E6E6E6')
        axa.xaxis.grid(False)  
        axa.yaxis.grid(True, color='white')  
        axa.tick_params(axis='both', which='major', labelsize=18)
        ax = plt.gca()
        ax.set_xticks([x_index[0], x_index[len(x_index)//3], x_index[2*len(x_index)//3], x_index[-1]])
        ax.set_xticklabels([Budget_set[0], Budget_set[len(Budget_set)//3], Budget_set[2*len(Budget_set)//3], Budget_set[-1]])
        ax.spines['top'].set_color('none')
        ax.spines['right'].set_color('none')
        ax.spines['left'].set_color('none')
        plt.savefig(f'{save}/{actionset}_{info}_{robust_comparison}RoCompare/Average_Accumulated_Regret_Under_Attack_Method_{attack}.png') 
        plt.show() 