# import matplotlib.pyplot as plt
# import pandas as pd
# import numpy as np
# import re
# import matplotlib.colors as mcolors
# import matplotlib.cm as cm

# def parse_text(text):
#     # Use regex to extract the interto value, percentage, and mse
#     pattern = r"interto(\d+).*?_p(\d+).*?mse:(\d+\.\d+)"
#     matches = re.findall(pattern, text, re.DOTALL)
#     final_list = [(int(m[0]), int(m[1]), float(m[2])) for m in matches]
#     new_final_list = []
#     find_flag = False
#     mse_base = dict()
#     for answer_tuple in final_list:
#         if answer_tuple[0] < 336 and answer_tuple[0] != 8:
#             new_final_list.append(answer_tuple)
#         if answer_tuple[0] == 336:
#             find_flag = True
#             mse_base[str(answer_tuple[1])] = answer_tuple[2]
#     assert find_flag == True
#     new_final_list = [(interto, percent, mse - mse_base[str(percent)]) for interto, percent, mse in new_final_list]
#     return new_final_list

# def read_data(file_path):
#     with open(file_path, 'r') as file:
#         text = file.read()
#     results = parse_text(text)
#     return pd.DataFrame(results, columns=['Interto', 'Percentage', 'MSE'])

# def plot_data(df):
#     # Setup the colormap
#     norm = mcolors.LogNorm(vmin=df['Interto'].min(), vmax=df['Interto'].max())
#     scalar_map = cm.ScalarMappable(norm=norm, cmap=cm.viridis)

#     plt.figure(figsize=(10, 6))
#     grouped = df.groupby('Interto')
    
#     for interto, group in grouped:
#         group.sort_values('Percentage', inplace=True)
#         color = scalar_map.to_rgba(interto)
#         # plt.plot(group['Percentage'], group['MSE'], label=f'Interto {interto}', marker='o', markersize=4, color=color)
#         # use log scale in y-axis.
#         plt.plot(group['Percentage'], group['MSE'], label=f'Interto {interto}', marker='o', markersize=4, color=color)
        
    
#     plt.title('MSE(inter_to,percent) - MSE(336,percent) by Interto and Percentage: log scale')
#     plt.xlabel('Percentage of Data Used')
#     plt.ylabel('MSE')
    
#     # Colorbar with custom ticks
#     cbar = plt.colorbar(scalar_map, label='Interto')
#     tick_locs = np.unique(df['Interto'])  # Unique interto values
#     cbar.set_ticks(tick_locs)
#     cbar.set_ticklabels(tick_locs)
    
#     plt.grid(True)
#     plt.savefig("041701_traffic_diff336t192.png")

# if __name__ == "__main__":
#     file_path = 'newresult_traffic.txt'
#     df = read_data(file_path)
#     plot_data(df)


import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import re
import matplotlib.colors as mcolors
import matplotlib.cm as cm

def parse_text(text):
    # Use regex to extract the interto value, percentage, and mse
    pattern = r"interto(\d+).*?_p(\d+).*?mse:(\d+\.\d+)"
    matches = re.findall(pattern, text, re.DOTALL)
    final_list = [(int(m[0]), int(m[1]), float(m[2])) for m in matches]
    new_final_list = []
    find_flag = False
    mse_base = dict()
    for answer_tuple in final_list:
        
        if answer_tuple[0] >= 32 and answer_tuple[0] <= 768 and answer_tuple[0] != 3 and answer_tuple[0] != 336 and answer_tuple[0] != 512:
            new_final_list.append(answer_tuple)
        if answer_tuple[0] == 768:
            find_flag = True
            mse_base[str(answer_tuple[1])] = answer_tuple[2]
    assert find_flag == True
    new_final_list = [(interto, percent, mse - mse_base[str(percent)]) for interto, percent, mse in new_final_list]
    return new_final_list

def read_data(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    results = parse_text(text)
    return pd.DataFrame(results, columns=['Interto', 'Percentage', 'MSE'])

def plot_data(df):
    # Setup the colormap
    norm = mcolors.LogNorm(vmin=df['Interto'].min(), vmax=df['Interto'].max())
    scalar_map = cm.ScalarMappable(norm=norm, cmap=cm.viridis)

    plt.figure(figsize=(10, 6))
    grouped = df.groupby('Interto')
    
    for interto, group in grouped:
        group.sort_values('Percentage', inplace=True)
        color = scalar_map.to_rgba(interto)
        plt.plot(group['Percentage'], group['MSE'], label=f'Interto {interto}', marker='o', markersize=4, color=color)
        
    plt.title('MSE(inter_to,percent) - MSE(336,percent) by Interto and Percentage: log scale')
    plt.xlabel('Percentage of Data Used')
    plt.ylabel('MSE')
    # plt.yscale('log')  # Set the y-axis to a logarithmic scale
    plt.ylim(-0.01,0.15)

    # Colorbar with custom ticks
    cbar = plt.colorbar(scalar_map, label='Interto')
    tick_locs = np.unique(df['Interto'])  # Unique interto values
    cbar.set_ticks(tick_locs)
    cbar.set_ticklabels(tick_locs)
    
    plt.grid(True)
    plt.legend()
    plt.savefig("042401_traffic_diff512t192.png")

if __name__ == "__main__":
    file_path = 'newresult_traffic_512_192.txt'
    df = read_data(file_path)
    plot_data(df)
