import os
import os
import argparse
import subprocess
import re
import pandas as pd
from tqdm import tqdm

def list_directories(path="plp/programs"):
    return [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]

def get_programs(programs_file):
    with open(programs_file, 'r') as file:
        programs_names = [line.strip() for line in file if line.strip() and not line.strip().startswith(('#', '%', '//'))]
    return programs_names

def parse_output(output):
    compilation_time = 0.0
    num_nodes = 0
    num_edges = 0
    model_count = 0

    for line in output.split('\n'):
        if "Final time:" in line:
            times = re.findall(r"[-+]?\d*\.\d+|\d+", line)
            compilation_time = float(times[0])
        elif "Number of nodes:" in line:
            sizes = re.findall(r"[-+]?\d*\.\d+|\d+", line)
            num_nodes = int(sizes[0])
        elif "Number of edges:" in line:
            sizes = re.findall(r"[-+]?\d*\.\d+|\d+", line)
            num_edges = int(sizes[0])
        elif line.startswith('s '):
            model_count = int(line.split()[1])

    compression_rate = model_count / num_edges if num_edges != 0 else 0

    return {
        "compilation_time": compilation_time,
        "circuit_node_size": num_nodes,
        "circuit_edge_size": num_edges,
        "model_count": model_count,
        "compression_rate": compression_rate
    }

def write_broken_run(results, program_name, timeout):
    results["program"].append(program_name)
    results["config"].append("d4")
    results["circuit_node_size"].append(-1)
    results["circuit_edge_size"].append(-1)
    results["model_count"].append(-1)
    results["compression_rate"].append(-1)
    results["compilation_time"].append(timeout)

def main():
    parser = argparse.ArgumentParser(description="Run d4 compilation experiments.")
    parser.add_argument("base_path", type=str, help="Path to the base directory containing program directories.")
    parser.add_argument("programs_file", nargs='?', help="File containing list of programs to process. If not provided, all programs in the base directory will be processed.")
    parser.add_argument("output_dir", type=str, help="Path to the directory where results will be saved.")
    parser.add_argument("--time_wall", type=int, default=1800, help="Maximum time (in seconds) for each execution.")
    args = parser.parse_args()

    base_path = args.base_path
    programs = get_programs(args.programs_file) if args.programs_file else list_directories(base_path)

    results = {
        "program": [],
        "config": [],
        "circuit_node_size": [],
        "circuit_edge_size": [],
        "model_count": [],
        "compression_rate": [],
        "compilation_time": []
    }

    time_wall_broken = False

    for program in tqdm(programs, desc="Programs"):
        if time_wall_broken:
            write_broken_run(results, program, args.time_wall)
            continue

        dir_path = os.path.join(base_path, program)
        program_path = os.path.join(dir_path, f"{program}.cnf")
        output_path = os.path.join(dir_path, f"{program}.nnf.d4")
        command = f"d4/d4 -dDNNF {program_path} -out={output_path}"

        try:
            output = subprocess.run(command, shell=True, text=True, capture_output=True, timeout=args.time_wall)
            if output.returncode != 0:
                raise subprocess.CalledProcessError(output.returncode, command)

            stats = parse_output(output.stdout)

            results["program"].append(program)
            results["config"].append("d4")
            results["circuit_node_size"].append(stats["circuit_node_size"])
            results["circuit_edge_size"].append(stats["circuit_edge_size"])
            results["model_count"].append(stats["model_count"])
            results["compression_rate"].append(stats["compression_rate"])
            results["compilation_time"].append(stats["compilation_time"])
        except subprocess.TimeoutExpired:
            print(f"Command timed out for {program}: {command}")
            time_wall_broken = True
            write_broken_run(results, program, args.time_wall)
        except subprocess.CalledProcessError as e:
            print(f"Error processing {program}: {e}")
            write_broken_run(results, program, -1)

    df = pd.DataFrame(results)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    subset_name = os.path.splitext(os.path.basename(args.programs_file))[0] if args.programs_file else "all"
    experiments_dir = os.path.join(args.output_dir, subset_name)
    if not os.path.exists(experiments_dir):
        os.makedirs(experiments_dir)

    df.to_csv(f"{experiments_dir}/d4.csv", index=False)

if __name__ == "__main__":
    main()
