import os
import json
import argparse
from scipy import stats
import numpy as np
from KnowledgeSynapticNetwork.utils import combine_data_from_jsonl_files
def perform_t_test(cr_values, hypothetical_mean):
    t_stat, p_value = stats.ttest_1samp(cr_values, hypothetical_mean)
    print(f"T-statistic: {t_stat}, P-value: {p_value}")

def read_cr_values_from_file(file_path):
    cr_values = []
    with open(file_path, 'r') as file:
        for line in file:
            data_entry = json.loads(line)
            for uuid, info in data_entry.items():
                cr_values.append(info['consistency_ratio'])
    return cr_values

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--neurons_results_dir_to_save', default='ResDebug/0406/neurons_GPT2')
    parser.add_argument('--cr_threshold', type=float, default=0.05)  # Ensure correct type for threshold
    parser.add_argument('--output_file_path', default='ResDebug/0406/neurons_GPT2/filtered_cr_data.jsonl')
    args = parser.parse_args()

    # Check if the output file already exists
    if os.path.exists(args.output_file_path):
        # File exists, read CR values directly from file
        filtered_crs = read_cr_values_from_file(args.output_file_path)
    else:
        # File does not exist, process data and save filtered results
        combined_data = combine_data_from_jsonl_files(args.neurons_results_dir_to_save)
        filtered_crs = []
        with open(args.output_file_path, 'w') as output_file:
            for data_entry in combined_data:
                for uuid, info in data_entry.items():
                    if info.get('consistency_ratio', 1) < args.cr_threshold:  # Default to 1 if not present
                        json.dump(data_entry, output_file)
                        output_file.write('\n')
                        filtered_crs.append(info['consistency_ratio'])

    # Perform a t-test on the filtered CRs
    perform_t_test(filtered_crs, hypothetical_mean=args.cr_threshold)
