import argparse
import numpy as np
import time
from tqdm import tqdm

import Algorithm
import Bandits


def parse_args():
    """
    Specifies command line arguments for the program.
    """
    parser = argparse.ArgumentParser(description='Best arm identification')

    parser.add_argument('--seed', default=1, type=int,
                        help='Seed for random number generators')
    # default best-arm options
    parser.add_argument('--n', default=100, type=int,
                        help='number of total arms')
    parser.add_argument('--delta', default=0.1, type=float,
                        help='1 - target confidence')
    parser.add_argument('--method', default='TrackandStop',
                        help='method')
    parser.add_argument('--num_sim', default=100, type=int,
                        help='number of total simulation')

    return parser.parse_args()


if __name__ == '__main__':
    delta_list = [1e-1, 1e-2, 1e-3, 1e-4]
    l3_list = [180, 250, 320, 390]
    for k in range(len(delta_list)):
        delta = delta_list[k]
        args = parse_args()
        np.random.seed(args.seed)
        np.set_printoptions(threshold=np.inf)
        num_arms = args.n
        epsilon = 0.01
        print('epsilon:', epsilon)
        alpha = 1.001
        L1 = int(np.ceil(60 * np.sqrt(np.log(1 / delta)))) + 40
        print('L1:', L1)
        L2 = int(np.ceil(30 * np.log(1 / delta))) + 30
        print('L2:', L2)
        L3 = l3_list[k]
        print('L3:', L3)
        num_error1 = 0
        num_error2 = 0
        num_error3 = 0
        total_sample1 = 0
        total_sample2 = 0
        total_sample3 = 0
        num_condition1 = 0
        # uniform
        arm_means_uniform = np.random.uniform(0.2, 0.4, num_arms)
        arm_means_uniform[0] = 0.5
        print('arm_means_uniform:', arm_means_uniform)
        # normal
        arm_means_normal = np.random.normal(0.2, 0.2, num_arms)
        for i in range(num_arms):
            mean = arm_means_normal[i]
            while mean > 0.4 or mean < 0:
                mean = np.random.normal(0.2, 0.2)
            arm_means_normal[i] = mean
        arm_means_normal[0] = 0.6
        print('arm_means_normal:', arm_means_normal)
        time_list = []
        sample_list = []
        batch_list = []
        num_condition = 0
        num_error = 0
        for i in tqdm(range(args.num_sim)):
            start_time = time.time()
            # these parameters can also be set individually for each arm

            sim = Bandits.Simulator(num_arms=num_arms, arm_means=arm_means_uniform)
            total_sample, mu, best_arm, condition, batch = Algorithm.Tri_BBAI(epsilon, delta, L1, L2, L3, alpha, sim)
            if (condition):
                num_condition += 1
            if (best_arm != 1):
                num_error += 1
            sample_list.append(total_sample)
            batch_list.append(batch)
            end_time = time.time()
            time_list.append(end_time - start_time)
        print('average_condition:', num_condition / args.num_sim)
        print('average_error:', num_error / args.num_sim)
        print('Runtime Mean:', np.mean(time_list))
        print('Runtime Std:', np.std(time_list))
        print('Sample Mean:', np.mean(sample_list))
        print('Sample std:', np.std(sample_list))
        print('Batch Mean:', np.mean(batch_list))
        print('Batch Std:', np.std(batch_list))
