import os
import sys
import yaml
import argparse
import shutil
import logging
import json
from datetime import datetime

# Set up project root and import paths
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import runner modules
from scripts.run_endowment_generator import main as run_endowment_generation
from scripts.run_regression_experiment import run_regression_experiment
from modules.response_converter import ResponseUtils

def setup_logger(log_path):
    logger = logging.getLogger("pipeline_logger")
    logger.setLevel(logging.INFO)
    logger.handlers = []  # Clear existing handlers

    formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")

    # File handler
    file_handler = logging.FileHandler(log_path)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Console handler
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)

    return logger


def generate_regression_config(endowment_config, output_dir, output_path):
    """
    Create a regressuib config YAML using outputs from endowment generation.
    """
    meta = endowment_config["metadata"]
    gen_paths = endowment_config["paths"]

    # Construct relative paths to generated files
    responses_csv = os.path.join(output_dir, "responses.csv")
    endowments_csv = os.path.join(output_dir, "endowments.csv")

    regression_config = {
        "experiment": {
            "name": f"{meta['name']}_weight_determination",
            "description": f"Weight determination on responses generated from: {meta['name']}",
            "seed": meta.get("seed", 101)
        },
        "paths": {
            "survey_csv": gen_paths["survey_csv"],
            "survey_yaml": gen_paths["survey_yaml"],
            "responses_csv": responses_csv,
            "endowments_csv": endowments_csv,
            "aggregate_json": gen_paths["aggregate_json"]
        },
        "lasso": endowment_config.get("lasso", {
            "alpha_expr": "np.logspace(-5, 1, 20)",
            "max_iter": 10000,
            "validation": endowment_config.get("validation", {
            "strategy": "cv",
            "cv_folds": 5
            }),
            "post_selection_refit": False
        }),
        "elasticnet": endowment_config.get("elasticnet", {
            "alpha_expr": "np.logspace(-5, 1, 20)",
            "l1_ratio_expr": "np.linspace(0.1,1,10)",
            "max_iter": 10000,
            "validation": endowment_config.get("validation", {
            "strategy": "cv",
            "cv_folds": 5
            }),
            "plot_style": "2D",
            "post_selection_refit": False
        }),
        "split_settings": endowment_config.get("split_settings", {
            "use_proxy_only": True,
            "train_val_split": ["train", "valid"],
            "test_split": "test"
        }),
        "weight_assignment_model": endowment_config.get("weight_assignment_model", "lasso"),
        "report": {"css": endowment_config.get("report", {}).get("regression", "styles/lasso_report.css")}
    }

    with open(output_path, "w") as f:
        yaml.safe_dump(regression_config, f)

    return output_path


def run_full_pipeline(config_path):
    """
    Run both endowment generation and regression experiment sequentially.
    """
    # === Load original config
    with open(config_path, "r") as f:
        endowment_config = yaml.safe_load(f)

    # === Create output directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    exp_name = endowment_config["metadata"]["name"]
    output_dir = os.path.join("outputs", f"{exp_name}_{timestamp}")
    os.makedirs(output_dir, exist_ok=True)

    # === Setup logger
    log_path = os.path.join(output_dir, "pipeline.log")
    logger = setup_logger(log_path)
    logger.info(f"[Pipeline] Output directory: {output_dir}")

    # === Copy original config
    config_copy_path = os.path.join(output_dir, "original_config.yaml")
    shutil.copy(config_path, config_copy_path)

    # === Run Endowment Generator
    logger.info("[Pipeline] Running endowment generation...")
    generator, _ = run_endowment_generation(config_copy_path, return_outputs=True)
    responses = generator.responses
    responses_code = generator.responses_code
    survey = responses.survey

    # === Count Valid Questions
    logger.info("[Pipeline] Computing valid question counts after response cleaning...")

    # === Save summary stats
    summary_stats = {
        "valid_questions_by_split": {},
        "valid_eids_by_split": {},
        "total_questions": len(responses.responses),
        "total_endowments": len(generator)
    }

    for split in ["train", "valid", "test"]:
        problematic_answers, *_ = ResponseUtils.analyze_missing_mappings(
            code_responses=responses_code,
            answer_responses=responses,
            split=split,
            verbose=False
        )
        qids_to_remove = list(problematic_answers.keys())

        df_split = responses.get_matrix_by_split(split, dropna=True)
        df_clean = df_split.drop(index=qids_to_remove, errors="ignore")

        valid_eids_count = ResponseUtils.count_valid_endowments(responses_code, split=split)

        summary_stats["valid_questions_by_split"][split] = df_clean.shape[0]
        summary_stats["valid_eids_by_split"][split] = valid_eids_count

        logger.info(f"[Pipeline] Valid questions in {split}: {df_clean.shape[0]} "
                    f"(cleaned from {df_split.shape[0]} original questions)")
        
        logger.info(f"[Pipeline] Valid endowments in {split}: {valid_eids_count}")
    
    # === Count valid EIDs across all questions (regression-ready agents)
    valid_eids_all = ResponseUtils.count_valid_endowments(responses_code, split=None)
    logger.info(f"[Pipeline] Valid endowments across all splits: {valid_eids_all}")
    summary_stats["valid_eids_all"] = valid_eids_all

    # Save to JSON
    summary_path = os.path.join(output_dir, "valid_counts_summary.json")
    with open(summary_path, "w") as f:
        json.dump(summary_stats, f, indent=2)

    logger.info(f"[Pipeline] Valid count summary saved to {summary_path}")

    # === Generate Lasso config
    lasso_config_path = os.path.join(output_dir, "regression_config.yaml")
    generate_regression_config(endowment_config, output_dir, lasso_config_path)
    logger.info("[Pipeline] Regression config saved.")

    # === Run Lasso Experiment
    logger.info("[Pipeline] Running regression experiment...")
    run_regression_experiment(lasso_config_path, output_dir)

    logger.info("[Pipeline] All stages complete.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--config", type=str, required=True,
        help="Path to endowment generation config YAML file."
    )
    args = parser.parse_args()
    run_full_pipeline(args.config)