# import re
# import matplotlib.pyplot as plt

# def read_ppl_values(filename):
#     """Reads PPL values from the given training output file, limited to 1500 iterations."""
#     with open(filename, 'r') as file:
#         file_content = file.read()
#     # Using regex to extract iteration and PPL values from validation lines
#     pattern = r"validation loss at iteration (\d+) \| lm loss value: [0-9.E+-]+ \| lm loss PPL: ([0-9.E+-]+) \|"
#     matches = re.findall(pattern, file_content)
#     iterations = []
#     ppls = []
#     for match in matches:
#         iteration = int(match[0])
#         if iteration <= 2000:  # Limit to 1500 iterations
#             iterations.append(iteration)
#             ppls.append(float(match[1]))

#         # iterations.append(iteration)
#         # ppls.append(float(match[1]))
#     return iterations, ppls

# # File names as provided, and mapping them to optimizer names
# files_and_labels = {
#     "gpt-alto-1e-2.out": "ALTO",
#     "gpt-lion-5e-5.out": "LION"
# }

# plt.figure(figsize=(10, 6))

# for filename, label in files_and_labels.items():
#     iterations, ppls = read_ppl_values(filename)
#     plt.plot(iterations, ppls, marker='o', linestyle='-', label=label)

# # plt.title('PPL Variation over First 2000 Iterations on 345M GPT with GPT-2-Output-Dataset')
# plt.xlabel('Iteration')
# plt.ylabel('PPL')
# plt.legend()
# plt.grid(True)
# plt.savefig("training_ppl_plot_ours_lion.pdf")  # Save the plot as a PNG file


import re
import matplotlib.pyplot as plt

def read_ppl_values(filename):
    """Reads PPL values from the given training output file, limited to 2000 iterations."""
    with open(filename, 'r') as file:
        file_content = file.read()
    # Using regex to extract iteration and PPL values from validation lines
    pattern = r"validation loss at iteration (\d+) \| lm loss value: [0-9.E+-]+ \| lm loss PPL: ([0-9.E+-]+) \|"
    matches = re.findall(pattern, file_content)
    iterations = []
    ppls = []
    for match in matches:
        iteration = int(match[0])
        if iteration <= 2000:  # Limit to 2000 iterations
            iterations.append(iteration)
            ppls.append(float(match[1]))

    return iterations, ppls

# File names as provided, and mapping them to optimizer names
files_and_labels = {
    "gpt-alto-1e-2.out": "ALTO",
    "gpt-lion-5e-5.out": "LION"
}

plt.figure(figsize=(10, 6))

for filename, label in files_and_labels.items():
    iterations, ppls = read_ppl_values(filename)
    plt.plot(iterations, ppls, marker='o', linestyle='-', label=label)

# Set axis labels with bold and larger font
plt.xlabel('Iteration', fontsize=14, fontweight='bold')
plt.ylabel('PPL', fontsize=14, fontweight='bold')

# Enlarge the legend
plt.legend(fontsize=25)

plt.grid(True)
plt.savefig("training_ppl_plot_ours_lion.pdf")  # Save the plot as a PDF file
