# Import necessary libraries from the user's original script and standard libraries
from gen_synth import *
from run_method import run_method_single_subgroup, run_method_multiple_times
import numpy as np
from sklearn.preprocessing import StandardScaler
import argparse
import torch
from syflow import syflow, And_Finder
from causalml.inference.tree import CausalTreeRegressor
import pysubgroup as ps
import pandas as pd
from utils import *
import timeit
from configs import *
from econml.grf import CausalForest
from sklearn.metrics import f1_score
import os

def parse_args():
    """Parses command-line arguments for the experiment."""
    parser = argparse.ArgumentParser(description="Run synthetic data experiments to test runtime vs. dimensionality for all methods.")
    # The --method argument is removed, as the script will loop through all methods internally.
    parser.add_argument("--outpath", type=str, required=True, help="Output path for the CSV results file.")
    # Default values for other parameters, as the focus is on scaling 'd'
    parser.add_argument("--setting", type=str, default='observational', help="Setting for the experiment.")
    parser.add_argument("--n", type=int, default=2000, help="Number of samples.")
    parser.add_argument("--tau", type=float, default=4.0, help="Treatment effect within the subgroup.")
    parser.add_argument("--gamma", type=float, default=1.0, help="Treatment effect outside the subgroup.")
    parser.add_argument("--c", type=float, default=2.0, help="Confounding effect within the subgroup.")
    parser.add_argument("--sigma", type=float, default=0.5, help="Standard deviation of the noise in Y.")
    parser.add_argument("--sg_size", type=float, default=0.25, help="Size of the subgroup.")
    parser.add_argument("--rule_size", type=int, default=2, help="Number of features in the rule.")
    parser.add_argument("--n_subgroups", type=int, default=3, help="Number of subgroups to generate.")
    parser.add_argument("--mean_shift", type=bool, default=True, help="Whether to apply mean shift in treatment assignment.")
    return parser.parse_args()

def run_analysis():
    """
    Main function to run the runtime analysis.
    It iterates over all methods and a range of dimensions, 
    runs each method, and saves the runtime results to a single file.
    """
    args = parse_args()
    
    # Define the methods and dimensions to test
    methods_to_test = ["subcon", "syflow", "pysubgroup", "causaltree", "honesttree"]
    dimensions_to_test = [5, 10, 20, 40, 60, 80, 100]
    
    # Store results
    results = []
    
    print("Starting runtime analysis for all methods.")
    print(f"Testing methods: {methods_to_test}")
    print(f"Testing dimensions: {dimensions_to_test}")

    # A single seed is sufficient as we are primarily interested in runtime, not statistical variability of metrics
    seed = 0

    # Outer loop for methods
    for method in methods_to_test:
        print(f"\n===== Starting analysis for method: {method} =====")
        
        # Inner loop for dimensions
        for d in dimensions_to_test:
            print(f"\n--- Running for dimension d = {d} ---")
            
            # --- 1. Data Generation ---
            # Generate data based on the specified setting
            try:
                if args.setting == "observational":
                    data = gen_observational_trial(n=args.n, d=d, tau=args.tau, gamma=args.gamma, c=args.c, sigma=args.sigma, sg_size=args.sg_size, rule_size=args.rule_size, seed=seed, mean_shift=args.mean_shift)
                elif args.setting == "demographic":
                    data = gen_demographic_data(n=args.n, d=d, tau=args.tau, gamma=args.gamma, c=args.c, sigma=args.sigma, sg_size=args.sg_size, rule_size=args.rule_size, seed=seed, mean_shift=args.mean_shift)
                elif args.setting == "interventional":
                    data = gen_interventional_trial(n=args.n, d=d, tau=args.tau, gamma=args.gamma, c=args.c, sigma=args.sigma, sg_size=args.sg_size, rule_size=args.rule_size, seed=seed, mean_shift=args.mean_shift)
                else:
                    raise ValueError(f"Unknown setting: {args.setting}. Please choose a valid setting.")
            except NameError as e:
                print(f"Error generating data: {e}. Make sure gen_synth.py and its functions are accessible.")
                continue # Skip to the next dimension or method

            X, s_star, A, Y = data["X"], data["s_star"], data["A"], data["Y"]
            
            # --- 2. Data Preprocessing ---
            is_discrete = [False] * X.shape[1]
            scaler_X = StandardScaler()
            X_scaled = scaler_X.fit_transform(X)
            scaler_Y = StandardScaler()
            Y_scaled = scaler_Y.fit_transform(Y.reshape(-1, 1)).flatten()
            feature_names = [f"X{i}" for i in range(X.shape[1])]

            # --- 3. Run Method and Measure Time ---
            start_time = timeit.default_timer()

            try:
                # The logic for each method is adapted from the user's original script
                if method == "subcon":
                    our_config = Subcon_Config().get_setting_config(args.setting)
                    run_method_multiple_times(X_scaled, X_scaled[A == 0], X_scaled[A == 1], Y_scaled, Y_scaled[A == 0], Y_scaled[A == 1], scaler_X, scaler_Y, feature_names, is_discrete, our_config, discrete_target=False, maximize=True, plot=False, max_reps=1)

                elif method == "syflow":
                    config = Syflow_Config().get_setting_config(args.setting)
                    X_cat = np.concatenate([X_scaled, A.reshape(-1, 1)], axis=1)
                    X_cat = torch.tensor(X_cat, dtype=torch.float64)
                    limits = get_data_limits(X_cat)
                    model = And_Finder(limits)
                    Y_tensor = torch.tensor(Y_scaled[:, None], dtype=torch.float64)
                    syflow(X_cat, Y_tensor, model, progressbar=False, alpha=config["alpha"], lr_classifier=config["lr_classifier"], subgroup_train_epochs=config["subgroup_train_epochs"])

                elif method == "pysubgroup":
                    X_df = pd.DataFrame(X_scaled, columns=feature_names)
                    Y_df = pd.DataFrame(Y_scaled, columns=["Y"])
                    data_df = pd.concat([X_df, Y_df], axis=1)
                    config = PySubgroup_Config().get_setting_config(args.setting)
                    target = ps.NumericTarget("Y")
                    search_space = ps.create_selectors(data_df, ignore=["Y"], nbins=config["n_bins"], intervals_only=False)
                    task = ps.SubgroupDiscoveryTask(data_df, target, search_space, result_set_size=args.n_subgroups, qf=ps.StandardQFNumeric(config["alpha"]))
                    ps.BeamSearch(beam_width=config["beam_width"]).execute(task)

                elif method == "causaltree":
                    config = CausalTree_Config().get_setting_config(args.setting)
                    min_support = int(config["min_samples_leaf"] * X.shape[0])
                    model = CausalTreeRegressor(min_samples_leaf=min_support, max_depth=config["max_depth"])
                    model.fit(X_scaled, treatment=A, y=Y_scaled)

                elif method == "honesttree":
                    config = HonestTree_Config().get_setting_config(args.setting)
                    min_support = int(config["min_samples_leaf"] * X.shape[0])
                    model = CausalForest(n_estimators=1, max_samples=1., honest=True, min_samples_leaf=min_support, max_depth=config["max_depth"], subforest_size=1, inference=False)
                    model.fit(X_scaled, A, Y_scaled)
                    
                else:
                    raise ValueError(f"Method '{method}' is not recognized.")

            except Exception as e:
                print(f"An error occurred while running method '{method}' for d={d}: {e}")
                continue # Skip to the next iteration

            end_time = timeit.default_timer()
            runtime = end_time - start_time
            
            print(f"Method '{method}' with dimension d = {d} finished in {runtime:.4f} seconds.")
            
            # Store the result for this method and dimension
            results.append({'method': method, 'dimension': d, 'runtime': runtime, 'n_samples': args.n})

    # --- 4. Save Results ---
    if not results:
        print("\nNo results were generated. The output file will not be created.")
        return
        
    results_df = pd.DataFrame(results)
    
    # Create output directory if it doesn't exist
    output_dir = os.path.dirname(args.outpath)
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
    # Save to CSV
    results_df.to_csv(args.outpath, index=False)
    
    print(f"\nAnalysis complete. All results saved to {args.outpath}")
    print(results_df)

if __name__ == "__main__":
    run_analysis()
