from time import time
import re
from utils.insight_tree import InsightTree
import random
import string
import os
import pandas as pd
import shutil
import math


def log_operating_time(start, logger=None, return_string: bool = False):
    """
    Displays the pipeline runtime or generate a string with information about the pipeline's operating time

    :param start: pipeline start time
    :param logger: logger for outputting information
    :param return_string: Is it necessary to return the string? (bool)
    :return: None | string with information about the pipeline's operating time
    """
    end = time()
    minutes = (end - start) // 60
    seconds = int(end - start) % 60
    operating_time = f"⌚️ Pipeline operating time: {minutes} minutes {seconds} seconds"
    if return_string:
        return operating_time
    else:
        logger.info(operating_time)


def collect_sort_nodes_into_phases(node_indexes_on_the_phase: list,
                                   insight_tree: InsightTree, is_higher_better: bool) -> list:
    """
    Sort ideas by phase and average score

    :param node_indexes_on_the_phase: indexes of the last stage (modeling),
                                      from which all available nodes can be recursively collected
    :param insight_tree: tree of nodes
    :param is_higher_better: sorting mode
    :return: indexes of all nodes at all stages (two-dimensional array)
    """
    all_idea_indexes = [node_indexes_on_the_phase]
    has_root_parent = False
    while not has_root_parent:
        parent_node_indexes = []
        for parent_node_index in node_indexes_on_the_phase:
            if insight_tree.nodes[parent_node_index].parent is None:
                # We have reached the final stage.
                has_root_parent = True
                break
            if insight_tree.nodes[parent_node_index].parent.index not in parent_node_indexes:
                parent_node_indexes.append(insight_tree.nodes[parent_node_index].parent.index)
        else:
            if is_higher_better:
                parent_node_indexes.sort(key=lambda x: -insight_tree.nodes[x].mean_score)
            else:
                parent_node_indexes.sort(key=lambda x: insight_tree.nodes[x].mean_score)
            node_indexes_on_the_phase = parent_node_indexes
            all_idea_indexes.append(parent_node_indexes)
    return all_idea_indexes


def replace_dirs(text: str, data_dir: str, result_dir: str, submission_subdir: str = None) -> str:
    """
    Replaces the values of SOURCE_DIR, DATA_DIR and RESULT_DIR variables in the given text with new paths.
    Also updates the submission_path assignment to reflect a new subdirectory under RESULT_DIR.

    :param text: Input text containing lines like:
                 SOURCE_DIR = r"..."
                 DATA_DIR   = r"..."
                 RESULT_DIR = r"..."
                 submission_path = os.path.join(DATA_DIR, "submissions", "file.csv")
    :param data_dir: New path for DATA_DIR.
    :param result_dir: New path for RESULT_DIR.
    :param submission_subdir: New subdirectory name under RESULT_DIR for submission_path.
    :return: Updated text with substituted values.
    """
    # Pattern to match lines like VAR = r"..."
    dir_pattern = re.compile(r"^(?P<var>(?:SOURCE_DIR|DATA_DIR|RESULT_DIR)\s*=\s*)r?[\"'].*?[\"']", re.MULTILINE)

    def dir_repl(match: re.Match) -> str:
        var_decl = match.group('var')
        if var_decl.strip().startswith('SOURCE_DIR'):
            new_val = data_dir
        elif var_decl.strip().startswith('DATA_DIR'):
            new_val = data_dir
        else:
            new_val = result_dir
        normalized_path = os.path.normpath(new_val)
        return f"{var_decl}r\"{normalized_path}\""

    text = dir_pattern.sub(dir_repl, text)

    # Pattern to match submission_path assignment using os.path.join
    if submission_subdir:
        sub_path_pattern = re.compile(
            r"^(\s*submission_path\s*=\s*os\.path\.join\()DATA_DIR,\s*\".*?\".*?\)", re.MULTILINE
        )

        def sub_path_repl(match: re.Match) -> str:
            indent = match.group(1)
            return f'{indent}RESULT_DIR, "{submission_subdir}")'

        text = sub_path_pattern.sub(sub_path_repl, text)

    return text


def generate_interim_report(node_indexes_on_test: dict, start: float, passage_number: int,
                            insight_tree: InsightTree, save_path: str):
    """
    Generate a report for this run. The report contains:
    a) Pipeline runtime
    b) Run number
    c) Test score information
    d) Complete tree at the time of this run

    :param node_indexes_on_test: a dictionary in which the keys are selected nodes for running the test,
                                 and the values are dictionaries with the test score and path to the code file
    :param start: Pipeline start time
    :param passage_number: passage number
    :param insight_tree: tree of nodes
    :param save_path: the path where the generated report should be saved
    :return: None
    """
    report = "# Report \n" \
             "## Operating time:\n {operating_time}\n" \
             "## Passage:\n Passage number {passage_number} \n" \
             "## Score on test data:\n {test_score} \n" \
             "## The tree at the moment:\n {tree}"
    operating_time = log_operating_time(start, return_string=True)

    test_score = ""
    for node_index_on_test in node_indexes_on_test:
        branch = insight_tree.get_branch(node_index_on_test)
        test_score += "Node indexes:\n\n"
        test_score += f"\t FE: {insight_tree.nodes[node_index_on_test].parent.index}\n\n"
        test_score += f"Idea:\n\n{branch[0]['idea']}\n\n"
        test_score += f"\t Modeling: {node_index_on_test}\n\n"
        test_score += f"Idea:\n\n{branch[1]['idea']}\n\n"

        test_score += f"Test score: {node_indexes_on_test[node_index_on_test]['test_score']}\n\n"
        test_score += f"File with code: {node_indexes_on_test[node_index_on_test]['file_with_code']}\n\n"

    fe_node_indexes = []
    modeling_node_indexes = []
    for node_index in insight_tree.nodes:
        try:
            if node_index and insight_tree.nodes[node_index].depth == 1:
                fe_node_indexes.append(node_index)
            elif node_index and insight_tree.nodes[node_index].depth == 0:
                modeling_node_indexes.append(node_index)
        except Exception as err:
            print(f"Error in the from report: {err}")

    tree = "### Data preparation and feature engineering"
    for fe_node_index in fe_node_indexes:
        tree += '\n\n'
        tree += "-" * 84
        tree += '\n\n'
        tree += str(insight_tree.nodes[fe_node_index]).replace("\n", "\n\n")

    tree += "\n### Modeling"
    for modeling_node_index in modeling_node_indexes:
        tree += '\n\n'
        tree += "-" * 84
        tree += '\n\n'
        tree += str(insight_tree.nodes[modeling_node_index]).replace("\n", "\n\n")

    report_file_path = os.path.join(save_path, f"Report_{passage_number}.md")
    with open(report_file_path, "w", encoding="utf-8") as file:
        file.write(
            report.format(
                operating_time=operating_time,
                passage_number=passage_number,
                test_score=test_score,
                tree=tree
            )
        )


def unique_filename_generator(length=10):
    """
    Generator that yields unique random filenames composed of ASCII letters and digits.

    :param length: Length of the random filename suffix (default is 10).
    :yield: A unique random string of the specified length.
    """
    seen = set()
    while True:
        random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=length))
        if random_suffix not in seen:
            seen.add(random_suffix)
            yield random_suffix


def write_running_time(start_work, log_file, agent_name):
    end_work = time()
    times_path = os.path.join(log_file, "times.csv")
    times_df = pd.read_csv(times_path)
    seconds = round(end_work - start_work)
    new_row = {
        'agent': agent_name,
        'running_time_in_seconds': seconds,
        "running_time_in_minutes": seconds // 60
    }
    times_df = pd.concat([times_df, pd.DataFrame([new_row])], ignore_index=True)
    times_df.to_csv(times_path, index=False)


def move_file_to_log_dir(competition_dir: str, log_dir: str):
    """
    Move files and directories from `competition_dir` to `log_dir`, excluding specific files and hidden files.

    Parameters:
    :param competition_dir: Directory path from which files are moved.
    :param log_dir: Directory path to which files are moved.

    Notes:
    - Files like "train.csv", "test.csv", "overview.txt", "count_metric.py", and "test_labeled.csv" are not moved.
    - Hidden files (those starting with '.') are also excluded.
    - Handles both files and directories, copying them with `shutil.copy2` and `shutil.copytree` respectively.
    - Prints errors if any occur during the copying process.
    """
    prohibited_files = [
        "train.csv", "test.csv", "overview.txt",
        "count_metric.py", "test_labeled.csv",
        "sample_submission.csv"
    ]

    for item in os.listdir(competition_dir):
        if item not in prohibited_files and not item.startswith("."):
            try:
                source_path = os.path.join(competition_dir, item)
                destination_path = os.path.join(log_dir, item)

                if os.path.isfile(source_path):
                    shutil.copy2(source_path, destination_path)
                elif os.path.isdir(source_path):
                    shutil.copytree(source_path, destination_path, dirs_exist_ok=True)
            except Exception as e:
                print(f"Error copying {item}: {e}")


def calculate_softmax_weights(mean_scores: list, is_higher_better: bool) -> list:
    if not mean_scores:
        return []

    if is_higher_better:
        scores = mean_scores
    else:
        scores = [-s for s in mean_scores]

    max_score = max(scores)
    exp_scores = [math.exp(s - max_score) for s in scores]

    sum_exp_scores = sum(exp_scores)

    if sum_exp_scores < 1e-12:
        return [1 / len(scores)] * len(scores)

    softmax_weights = [exp_s / sum_exp_scores for exp_s in exp_scores]
    return softmax_weights


def move_all_files(source_folder, destination_folder):
    """
    Moves all files from a source folder to a destination folder
    """
    if not os.path.exists(source_folder):
        raise FileNotFoundError(f"Error: Source folder '{source_folder}' does not exist.")

    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    for filename in os.listdir(source_folder):
        source_path = os.path.join(source_folder, filename)
        destination_path = os.path.join(destination_folder, filename)

        if os.path.isfile(source_path):
            try:
                shutil.move(source_path, destination_path)
            except:
                continue
