import os
import pandas as pd
import re

# Directory containing result files
RESULTS_DIR = 'results/'

# Initialize an empty list to store data
data = []

# Regex pattern to extract dataset and model from filename
filename_pattern = re.compile(r"^result_(?P<dataset>[A-Za-z0-9_]+)_(?P<model>[A-Za-z0-9\-\.]+)\.txt$")

# Iterate over all files in the results directory
for filename in os.listdir(RESULTS_DIR):
    # print(filename)
    match = filename_pattern.match(filename)
    if match:
        dataset = match.group("dataset")
        model = match.group("model")
        filepath = os.path.join(RESULTS_DIR, filename)
        
        
        with open(filepath, 'r') as file:
            for line in file:
                # Example line: "length: 0, N: 3, acc: 0.85"
                parts = line.strip().split(',')
                length = int(parts[0].split(':')[1].strip())
                N = int(parts[1].split(':')[1].strip())
                acc = float(parts[2].split(':')[1].strip())
                
                data.append({
                    'dataset': dataset,
                    'model': model,
                    'length': length,
                    'N': N,
                    'accuracy': acc
                })

# Create a DataFrame from the collected data
df = pd.DataFrame(data)

# Optional: Save the processed data for future use
df.to_csv('processed_results.csv', index=False)
