                                                       

import csv
import logging
import uuid              
from typing import List, Optional

from pydantic import ValidationError

from fortress.common.data_models import InputPromptRecord
from fortress.common.constants import SPLIT_DATABASE, SPLIT_BENCHMARK               

logger = logging.getLogger(__name__)

def load_prompts_from_csv(file_path: str) -> List[InputPromptRecord]:
    """
    Reads a CSV file, validates required columns, and creates a list of InputPromptRecord objects.

    Args:
        file_path: The path to the CSV file.

    Returns:
        A list of InputPromptRecord objects.
    """
    prompts: List[InputPromptRecord] = []
    required_columns = {'original_prompt', 'label', 'split', 'source_file'}
    optional_columns = {'prompt_category', 'prompt_style'}

    try:
        with open(file_path, mode='r', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            if not required_columns.issubset(reader.fieldnames if reader.fieldnames else []):
                missing = required_columns - (set(reader.fieldnames if reader.fieldnames else []))
                logger.error(f"CSV file {file_path} is missing required columns: {missing}. Skipping file.")
                return prompts

            for i, row in enumerate(reader):
                row_num_for_logging = i + 2                                    
                original_prompt = row.get('original_prompt', '').strip()
                if not original_prompt:
                    logger.warning(f"Missing original_prompt in row: {row_num_for_logging} in {file_path}. Skipping row.")
                    continue

                try:
                    label_str = row['label'].strip()                                                  
                    if not label_str:
                        logger.warning(f"Empty label string in row: {row_num_for_logging} in {file_path}. Skipping row.")
                        continue
                    label = int(label_str)
                    if label not in [0, 1]:
                         logger.warning(f"Invalid label value '{label}' in row: {row_num_for_logging} in {file_path}. Expected 0 or 1. Skipping row.")
                         continue
                except KeyError:                                                               
                    logger.error(f"'label' column missing in data row: {row_num_for_logging} in {file_path}, though header was present. Skipping row.")
                    continue
                except ValueError:
                    logger.warning(f"Invalid label format '{row.get('label')}' (not an integer) in row: {row_num_for_logging} in {file_path}. Skipping row.")
                    continue

                split = row.get('split', '').strip()
                if split not in ['database', 'benchmark', '']:                                         
                    logger.warning(f"Invalid split value '{split}' in row: {row_num_for_logging} in {file_path}. Skipping row.")
                    continue
                
                source_file = row.get('source_file', '').strip()
                if not source_file:
                    logger.warning(f"Missing source_file in row: {row_num_for_logging} in {file_path}. Skipping row.")
                    continue

                                 
                prompt_category = row.get('prompt_category', '').strip() or None
                prompt_style = row.get('prompt_style', '').strip() or None
                
                try:
                    prompt_record = InputPromptRecord(
                        original_prompt=original_prompt,
                        label=label,
                        split=split,
                        source_file=source_file,
                        prompt_category=prompt_category,
                        prompt_style=prompt_style
                                                           
                    )
                    prompts.append(prompt_record)
                except ValidationError as e:
                    logger.warning(f"Validation error creating InputPromptRecord for row {row_num_for_logging} in {file_path}: {e}. Skipping row.")
                    continue
                    
    except FileNotFoundError:
        logger.error(f"File not found: {file_path}")
    except Exception as e:
        logger.error(f"Failed to read or process CSV file {file_path}: {e}", exc_info=True)
        
    logger.info(f"Successfully loaded {len(prompts)} prompts from {file_path}")
    return prompts

