import yaml
from logger_config import setup_logger
from time import time
from utils.utils import log_operating_time, generate_interim_report
import datetime
from pipeline import Pipeline
import pandas as pd
from dotenv import load_dotenv
import itertools
import random
from utils.utils import write_running_time
import os

if __name__ == '__main__':
    # Step 1. Load config and env
    with open('config.yaml', 'r') as f:
        config = yaml.load(f, Loader=yaml.SafeLoader)
    load_dotenv()
    os.environ["CUDA_VISIBLE_DEVICES"] = config["CUDA_VISIBLE_DEVICES"]

    api_keys = []
    for i in range(1, int(os.getenv("API_KEY_COUNT", "0")) + 1):
        key = os.getenv(f"API_KEY_{i}")
        if key:
            api_keys.append(key)

    config['URL'] = os.getenv("URL")
    random.shuffle(api_keys)
    config['api_keys'] = itertools.cycle(api_keys)
    config['api_key'] = next(config['api_keys'])

    # Step 2. Check competition and settings logs
    competition_files = [
        "overview.txt", "sample_submission.csv",
        "train.csv", "test.csv"
    ]
    path_competition = os.path.join(os.getcwd(), 'competition', config["competition"])
    if not os.path.exists(path_competition):
        raise KeyError(f"Competition {path_competition} not found!")
    existing_files = [
        file for file in competition_files if not os.path.exists(os.path.join(path_competition, file))
    ]
    if existing_files:
        raise KeyError(f"The following files are not found in the competition: {existing_files}!")

    current_date = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    path_log = os.path.join(os.getcwd(), config["log_dir"],
                            f"{config['competition'].replace(' ', '_')}_{current_date}")
    path_debug_log = os.path.join(path_log, 'debug')
    config['path_log'] = path_log
    config['path_debug_log'] = path_debug_log
    config['data_dir_path'] = os.path.join(os.getcwd(), 'competition', config['competition'])
    config['save_path'] = os.path.join(path_log, "code", "data")
    paths = [
        ('code',), ('images',), ('code', 'data'), ('submissions',), ("processed_data",),
        ('results',), ('eda_descriptions',)
    ]
    if not os.path.exists(path_log):
        os.makedirs(path_log, exist_ok=True)
    if not os.path.exists(path_debug_log):
        os.makedirs(path_debug_log, exist_ok=True)

    for path in paths:
        create_path = os.path.join(path_log, *path)
        if not os.path.exists(create_path):
            os.makedirs(create_path, exist_ok=True)

    # Create .csv files to save statistics
    pd.DataFrame({
        'agent': [], 'input_tokens': [], 'output_tokens': []}
    ).to_csv(os.path.join(path_debug_log, "tokens.csv"), index=False)
    pd.DataFrame({
        'agent': [], 'running_time_in_seconds': [], "running_time_in_minutes": []}
    ).to_csv(os.path.join(path_debug_log, "times.csv"), index=False)

    log_file = os.path.join(path_log, f'{config["competition"]}.log')
    debug_file = os.path.join(path_debug_log, f'{config["competition"]}_debug.log')
    main_logger, sub_logger, debug_logger = setup_logger(log_file, debug_file)

    start_log_text = f'Start on {config["competition"]}\nModel {config["model_name"]}\n'
    start_log_text += f"Number of API keys: {len(api_keys)}\n"
    main_logger.info(start_log_text)
    debug_logger.info(config)

    # Step 3. Start Pipeline
    start = time()
    pipe = Pipeline(config, start, main_logger, sub_logger, debug_logger)

    while time() - start <= config['time_run_minutes'] * 60:
        state = pipe.start()
        if pipe.state != 'error':
            break
        write_running_time(start, path_debug_log, "init_tree")
        main_logger.error(f"Restart")
    else:
        main_logger.error(f"Pipeline work time is over")
        exit(0)

    if pipe.state == 'error':
        main_logger.error(f"There was an error :(")
        exit(0)

    write_running_time(start, path_debug_log, "init_tree")
    passage_number = 1
    try:
        while True:
            log_operating_time(start, main_logger)
            pipe.forward(passage_number)
            pipe.generate_report(passage_number)

            if pipe.state == 'complete':
                # Tree initialization is complete
                if config['dynamic_model']:
                    config['model_name'] = 'gemini-2.5-flash'

                # Backprop
                pipe.backprop()
                if pipe.state == "error":
                    break
                log_operating_time(start, main_logger)
                main_logger.info("🟢 Backprop complete!")
                pipe.generate_report(passage_number)

                if time() - start > config['time_run_minutes'] * 60:
                    main_logger.info(
                        f"The time set for execution ({config['time_run_minutes']} minutes) has elapsed"
                    )
                    break

                # Adding EDA
                pipe.adding_eda(passage_number)
                pipe.generate_report(passage_number)
                log_operating_time(start, main_logger)
                main_logger.info("🟢 Adding eda complete!")

                if time() - start > config['time_run_minutes'] * 60:
                    main_logger.info(
                        f"The time set for execution ({config['time_run_minutes']} minutes) has elapsed"
                    )
                    break

                # Adding
                pipe.adding(passage_number)
                pipe.generate_report(passage_number)
                log_operating_time(start, main_logger)
                main_logger.info("🟢 Adding complete!")

                if time() - start > config['time_run_minutes'] * 60:
                    main_logger.info(
                        f"The time set for execution ({config['time_run_minutes']} minutes) has elapsed"
                    )
                    break

                # Merging
                pipe.merging(passage_number)
                pipe.generate_report(passage_number)
                main_logger.info("🟢 Merging complete!")

                node_indexes_on_test, pre_path_to_save_results_of_passage = pipe.reflect(passage_number)
                generate_interim_report(
                    node_indexes_on_test=node_indexes_on_test,
                    passage_number=passage_number,
                    start=start,
                    save_path=pre_path_to_save_results_of_passage,
                    insight_tree=pipe.insighter.insight_tree
                )
                if time() - start > config['time_run_minutes'] * 60:
                    main_logger.info(
                        f"The time set for execution ({config['time_run_minutes']} minutes) has elapsed"
                    )
                    break
                passage_number += 1
                main_logger.info(f"➖➖➖Passage number {passage_number}➖➖➖")
                pipe.state = 'AddingEDA'

            elif pipe.state == 'CompletePassage':
                node_indexes_on_test, pre_path_to_save_results_of_passage = pipe.reflect(passage_number)
                generate_interim_report(
                    node_indexes_on_test=node_indexes_on_test,
                    passage_number=passage_number,
                    start=start,
                    save_path=pre_path_to_save_results_of_passage,
                    insight_tree=pipe.insighter.insight_tree
                )
                main_logger.info("🟢 Complete Passage!")
                passage_number += 1
                main_logger.info(f"➖➖➖Passage number {passage_number}➖➖➖")
                pipe.state = 'AddingEDA'

            elif pipe.state == 'error':
                pipe.finish()
                main_logger.info(f"There was an error :(")
                break

            elif pipe.state == 'finish':
                pipe.finish()
                break

            if time() - start > config['time_run_minutes'] * 60:
                main_logger.info(
                    f"The time set for execution ({config['time_run_minutes']} minutes) has elapsed"
                )
                break

    except Exception as err:
        main_logger.info(f"Error during pipeline execution: {err}")

    # Step 4. Generating a report
    log_operating_time(start, main_logger)
    main_logger.info(f"Finish")
    pipe.finish()
