import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import argparse
import pandas as pd
import os
from pathlib import Path
import logging

from fortress.config import Config
from scripts.utils.script_helpers import find_csv_files

               
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def clear_split_in_single_csv(csv_path: Path, output_path: Path, config: Config, inplace: bool):
    """
    Clears the split column in a single CSV file, setting it to a default value.

    Args:
        csv_path (Path): The path to the input CSV file.
        output_path (Path): The path to save the modified CSV file. If inplace is True, this is the same as csv_path.
        config (Config): The application configuration.
        inplace (bool): Whether to modify the file inplace or save to output_path.
    """
    try:
        df = pd.read_csv(csv_path)
        logger.info(f"Successfully read {csv_path}. Shape: {df.shape}")

        if config.split_column_name not in df.columns:
            logger.warning(f"Split column '{config.split_column_name}' not found in {csv_path.name}. Adding it with cleared value.")
            df[config.split_column_name] = config.cleared_split_value
        else:
            df[config.split_column_name] = config.cleared_split_value
            logger.info(f"Cleared split column '{config.split_column_name}' in {csv_path.name}, setting to '{config.cleared_split_value}'.")

        if inplace:
            final_output_path = csv_path
        else:
            final_output_path = output_path
            final_output_path.parent.mkdir(parents=True, exist_ok=True)
        
        df.to_csv(final_output_path, index=False)
        if inplace:
            logger.info(f"Overwrote {final_output_path} with cleared split column.")
        else:
            logger.info(f"Saved CSV with cleared split column to {final_output_path}")

    except FileNotFoundError:
        logger.error(f"Error: File not found at {csv_path}")
    except pd.errors.EmptyDataError:
        logger.error(f"Error: File is empty at {csv_path}")
    except Exception as e:
        logger.error(f"An unexpected error occurred while processing {csv_path}: {e}", exc_info=True)


def main():
    parser = argparse.ArgumentParser(description="Clear the 'split' column in CSV file(s).")
    parser.add_argument("input_path", type=str, help="Path to a CSV file or a directory containing CSV files.")
    parser.add_argument("--output_dir", type=str, help="Directory to save copies of modified CSV files. If not provided, original files will be overwritten.")
                                                                                                                                        
    
    args = parser.parse_args()
    config = Config()

    input_path = Path(args.input_path)
    
    csv_files_to_process = find_csv_files(input_path)

    if not csv_files_to_process:
        logger.warning(f"No CSV files found in {input_path}.")
        return

    for csv_file in csv_files_to_process:
        logger.info(f"Processing {csv_file}...")
        
        is_inplace_operation: bool
        path_for_saving: Path

        if args.output_dir:
            current_output_dir = Path(args.output_dir)
            current_output_dir.mkdir(parents=True, exist_ok=True)                           
            path_for_saving = current_output_dir / csv_file.name
            is_inplace_operation = False
        else:
                                                                                                           
            path_for_saving = csv_file 
            is_inplace_operation = True

                                                                                               
                                                                                                                   
                                                                            
        clear_split_in_single_csv(csv_path=csv_file, 
                                  output_path=path_for_saving, 
                                  config=config, 
                                  inplace=is_inplace_operation)
        logger.info("-" * 30)
        
    logger.info("CSV clearing process completed.")

if __name__ == "__main__":
    main()
