import os
import csv
import re
import numpy as np
import matplotlib.pyplot as plt

# Specify directory paths, T value, and output file paths
base_path1 = 'directory_path_1'  # Replace with the actual directory path 1
base_path2 = 'directory_path_2'  # Replace with the actual directory path 2
T = 30  # Specify the value of T
output_path = 'output_directory_path'  # Replace with the desired output directory path

# Get all seed folders
seed_folders1 = [f for f in os.listdir(base_path1) if f.startswith('seed')]
seed_folders2 = [f for f in os.listdir(base_path2) if f.startswith('seed')]

# Get the file names from the first seed folder as a reference
file_names1 = [f for f in os.listdir(os.path.join(base_path1, seed_folders1[0])) if f.endswith('.csv')]
file_names2 = [f for f in os.listdir(os.path.join(base_path2, seed_folders2[0])) if f.endswith('.csv')]

# Sort the file names based on the numeric part
file_names1.sort(key=lambda x: int(re.findall(r'\d+', x)[0]))
file_names2.sort(key=lambda x: int(re.findall(r'\d+', x)[0]))

# Dictionaries to store the results
results1 = {}
results2 = {}

# Iterate over each file name
for file_name in file_names1:
   probabilities = []
   for seed_folder in seed_folders1:
       file_path = os.path.join(base_path1, seed_folder, file_name)
       with open(file_path, 'r') as file:
           csv_reader = csv.reader(file)
           next(csv_reader)  # Skip header
           for row in csv_reader:
               if int(row[0]) == T:
                   probabilities.append(float(row[1]))
                   break
   avg_probability = sum(probabilities) / len(probabilities)
   results1[file_name] = avg_probability

for file_name in file_names2:
   probabilities = []
   for seed_folder in seed_folders2:
       file_path = os.path.join(base_path2, seed_folder, file_name)
       with open(file_path, 'r') as file:
           csv_reader = csv.reader(file)
           next(csv_reader)  # Skip header
           for row in csv_reader:
               if int(row[0]) == T:
                   probabilities.append(float(row[1]))
                   break
   avg_probability = sum(probabilities) / len(probabilities)
   results2[file_name] = avg_probability

# Calculate standard deviations
std_dev1 = np.std(list(results1.values()))
std_dev2 = np.std(list(results2.values()))

# Plot the chart
plt.figure(figsize=(10, 6))
plt.plot(range(len(results1)), results1.values(), marker='o', label='PCPO')
plt.plot(range(len(results2)), results2.values(), marker='o', label='CPO')
plt.xlabel('File Index')
plt.ylabel('Average Probability')
plt.title(f'Average Probability for T={T}')
plt.xticks(range(len(results1)), results1.keys(), rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the plot to a file
plot_output_file = os.path.join(output_path, f"average_probability_T{T}.png")
plt.savefig(plot_output_file)
print(f"Plot saved to {plot_output_file}")
plt.show()

# Save the results to a CSV file in the specified output path
output_file = os.path.join(output_path, 'output_file.csv')
os.makedirs(output_path, exist_ok=True)  # Create the output directory if it doesn't exist

with open(output_file, 'w', newline='') as file:
   csv_writer = csv.writer(file)
   csv_writer.writerow(['File Name', 'CPO Average Probability', 'Flipped Average Probability', 'CPO Standard Deviation', 'Flipped Standard Deviation'])
   for file_name in results1.keys():
       csv_writer.writerow([file_name, results1[file_name], results2[file_name], std_dev1, std_dev2])

print(f"Results saved to {output_file}")