from typing import Dict, List, Optional
import json
import random
from collections import defaultdict
import torch
from .configs import TrainConfig, ApibenchDataConfig, MLLMDataConfig
from datasets import Dataset
from .retrieval_replay import PromptReplayBuffer, ExperienceIndex, generate_example_id

dict_retriever = {
    "bm25": "bm25_retrieved_info",
    "sentence_transformer": "sentence_transformer_retrieved_info",
    "splade": "splade_retrieved_info", 
    "flagembedding": "flagembedding_retrieved_info",
}

gorilla_prompt = (
    "You are Gorilla, an expert API model router. "
    "Read the ###Instruction and ###Input below and return ONLY a single model name. "
    "Do not invent model name. Do not return anything else.\n\n"
)

gorilla_fewshot_prompt = (
    "You are Gorilla, an expert API model router. "
    "Read the [ORIGINAL PROMPT] section below and return ONLY a single model name for that prompt. "
    "If [RELATED EXAMPLES] are provided, they are for reference only to help you understand similar cases - "
    "do NOT return the models from those examples. Return a model name only for the [ORIGINAL PROMPT]. "
    "Do not invent model name. Do not return anything else.\n\n"
)

def create_gorilla_prompt_with_date(date: str) -> str:
    """Create a gorilla prompt with a date cutoff for model selection."""
    return (
        "You are Gorilla, an expert API model router. "
        "Read the ###Instruction and ###Input below and return ONLY a single model name. "
        f"Do not invent model name. Do not return anything else. Choose only models with a model date before {date}.\n\n"
    )

def create_gorilla_fewshot_prompt_with_date(date: str) -> str:
    """Create a gorilla few-shot prompt with a date cutoff for model selection."""
    return (
        "You are Gorilla, an expert API model router. "
        "Read the [ORIGINAL PROMPT] section below and return ONLY a single model name for that prompt. "
        "If [RELATED EXAMPLES] are provided, they are for reference only to help you understand similar cases - "
        "do NOT return the models from those examples. Return a model name only for the [ORIGINAL PROMPT]. "
        f"Choose only models with a model date before {date}. "
        "Do not invent model name. Do not return anything else.\n\n"
    )

gorilla_prompt_explanation_json = (
    "You are Gorilla, an expert API model router. "
    "Read the ###Instruction and ###Input below and answer with a model name and a brief explanation. "
    "Return the answer in JSON format with fields 'model_name' and 'explanation'. "
    "For example: {\"model_name\": \"actual model name\", \"explanation\": \"actual explanation\"}. "
    "Do not include any other fields. Do not invent model names. Do not return anything else. "
    "Important: DO NOT wrap the JSON in triple backticks or any markdown/code fence (for example, do NOT return ```json ... ```). "
    "Return only the JSON object string starting with '{' and ending with '}', with no surrounding markdown, backticks, or extra text. "
    "Do not prepend or append any characters outside the JSON (no headings, no explanatory text).\n\n"
)

gorilla_fewshot_prompt_explanation_json = (
    "You are Gorilla, an expert API model router. "
    "Read the [ORIGINAL PROMPT] section below and answer with a model name and a brief explanation for that prompt. "
    "If [RELATED EXAMPLES] are provided, they are for reference only to help you understand similar cases - "
    "do NOT return the models from those examples. Return a model name only for the [ORIGINAL PROMPT]. "
    "Return the answer in JSON format with fields 'model_name' and 'explanation'. "
    "For example: {\"model_name\": \"actual model name\", \"explanation\": \"actual explanation\"}. "
    "Do not include any other fields. Do not invent model names. Do not return anything else. "
    "Important: DO NOT wrap the JSON in triple backticks or any markdown/code fence (for example, do NOT return ```json ... ```). "
    "Return only the JSON object string starting with '{' and ending with '}', with no surrounding markdown, backticks, or extra text. "
    "Do not prepend or append any characters outside the JSON (no headings, no explanatory text).\n\n"
)

def create_gorilla_prompt_explanation_json_with_date(date: str) -> str:
    """Create a gorilla prompt with explanation (JSON format) and a date cutoff for model selection."""
    return (
        "You are Gorilla, an expert API model router. "
        "Read the ###Instruction and ###Input below and answer with a model name and a brief explanation. "
        "Return the answer in JSON format with fields 'model_name' and 'explanation'. "
        "For example: {\"model_name\": \"actual model name\", \"explanation\": \"actual explanation\"}. "
        "Do not include any other fields. Do not invent model names. Do not return anything else. "
        f"Choose only models with a model date before {date}. "
        "Important: DO NOT wrap the JSON in triple backticks or any markdown/code fence (for example, do NOT return ```json ... ```). "
        "Return only the JSON object string starting with '{' and ending with '}', with no surrounding markdown, backticks, or extra text. "
        "Do not prepend or append any characters outside the JSON (no headings, no explanatory text).\n\n"
    )

def create_gorilla_fewshot_prompt_explanation_json_with_date(date: str) -> str:
    """Create a gorilla few-shot prompt with explanation (JSON format) and a date cutoff for model selection."""
    return (
        "You are Gorilla, an expert API model router. "
        "Read the [ORIGINAL PROMPT] section below and answer with a model name and a brief explanation for that prompt. "
        "If [RELATED EXAMPLES] are provided, they are for reference only to help you understand similar cases - "
        "do NOT return the models from those examples. Return a model name only for the [ORIGINAL PROMPT]. "
        "Return the answer in JSON format with fields 'model_name' and 'explanation'. "
        "For example: {\"model_name\": \"actual model name\", \"explanation\": \"actual explanation\"}. "
        "Do not include any other fields. Do not invent model names. Do not return anything else. "
        f"Choose only models with a model date before {date}. "
        "Important: DO NOT wrap the JSON in triple backticks or any markdown/code fence (for example, do NOT return ```json ... ```). "
        "Return only the JSON object string starting with '{' and ending with '}', with no surrounding markdown, backticks, or extra text. "
        "Do not prepend or append any characters outside the JSON (no headings, no explanatory text).\n\n"
    )

gorilla_prompt_explanation = (
    "You are Gorilla, an expert API model router. "
    "Read the ###Instruction and ###Input below and answer with a model name and a brief explanation. "
    "Return the answer in the format: <<<model_name>>>:actual model name <<<explanation>>>:actual explanation. "
    "Do not include any other fields. Do not invent model names. Do not return anything else.\n\n"
)

gorilla_fewshot_prompt_explanation = (
    "You are Gorilla, an expert API model router. "
    "Read the [ORIGINAL PROMPT] section below and answer with a model name and a brief explanation for that prompt. "
    "If [RELATED EXAMPLES] are provided, they are for reference only to help you understand similar cases - "
    "do NOT return the models from those examples. Return a model name only for the [ORIGINAL PROMPT]. "
    "Return the answer in the format: <<<model_name>>>:actual model name <<<explanation>>>:actual explanation. "
    "Do not include any other fields. Do not invent model names. Do not return anything else.\n\n"
)

def create_gorilla_prompt_explanation_with_date(date: str) -> str:
    """Create a gorilla prompt with explanation (gorilla format) and a date cutoff for model selection."""
    return (
        "You are Gorilla, an expert API model router. "
        "Read the ###Instruction and ###Input below and answer with a model name and a brief explanation. "
        "Return the answer in the format: <<<model_name>>>:actual model name <<<explanation>>>:actual explanation. "
        "Do not include any other fields. Do not invent model names. Do not return anything else. "
        f"Choose only models with a model date before {date}.\n\n"
    )

def create_gorilla_fewshot_prompt_explanation_with_date(date: str) -> str:
    """Create a gorilla few-shot prompt with explanation (gorilla format) and a date cutoff for model selection."""
    return (
        "You are Gorilla, an expert API model router. "
        "Read the [ORIGINAL PROMPT] section below and answer with a model name and a brief explanation for that prompt. "
        "If [RELATED EXAMPLES] are provided, they are for reference only to help you understand similar cases - "
        "do NOT return the models from those examples. Return a model name only for the [ORIGINAL PROMPT]. "
        "Return the answer in the format: <<<model_name>>>:actual model name <<<explanation>>>:actual explanation. "
        "Do not include any other fields. Do not invent model names. Do not return anything else. "
        f"Choose only models with a model date before {date}.\n\n"
    )

# string template for answers in json format
def create_json_answer_template(model_name: str, explanation: str) -> str:
    return json.dumps({
        "model_name": model_name,
        "explanation": explanation
    })

# string template for answers in gorilla format with explanation
def create_gorilla_explanation_answer_template(model_name: str, explanation: str) -> str:
    return f"<<<model_name>>>:{model_name} <<<explanation>>>:{explanation}"
    

def load_dataset_json(path: str) -> list:
    dataset_json = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            if line.strip():
                dataset_json.append(json.loads(line))
            else:
                raise ValueError("Empty line found in the dataset file.")
    return dataset_json

def convert_to_conversational(
    raw_data: List[Dict[str, str]], 
    config: TrainConfig, 
    tokenizer,
    dataset_config: Optional[object] = None,
    is_replay: bool = False,
) -> List[Dict[str, str]]:
    # Use custom system prompt if provided, otherwise use default prompts (with date if specified)
    if config.system_prompt != "":
        system_prompt = config.system_prompt
    else:
        # Get date cutoff from dataset_config if available
        model_date_cutoff = dataset_config.model_date_cutoff if dataset_config and hasattr(dataset_config, 'model_date_cutoff') else None
        use_date = model_date_cutoff is not None
        
        # Determine which prompt template to use based on system_prompt_format
        # Use few-shot variants for retrieval_replay_fewshot
        if config.system_prompt_format == "gorilla_prompt_explanation_json":
            system_prompt = create_gorilla_fewshot_prompt_explanation_json_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt_explanation_json
        elif config.system_prompt_format == "gorilla_prompt_explanation":
            system_prompt = create_gorilla_fewshot_prompt_explanation_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt_explanation
        else:
            # Default to gorilla_fewshot_prompt (or gorilla_fewshot_prompt_with_date if date is specified)
            system_prompt = create_gorilla_fewshot_prompt_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt
   
    conversational_dataset = []
    for entry in raw_data:
        prompt = entry.get("instruction", "").replace('\r\n', '\n').strip()
        model_name = entry.get("model_name", "").replace('\r\n', '\n').strip()
        
        explanation = entry.get("explanation", "").replace('\r\n', '\n').strip()
        if len(explanation) > 1000:
            explanation = explanation[:1000] + "..."  # truncate long explanations, only few of them exceed 1000 chars, usually due to bad data entries, truncate to avoid very long inputs and save memory
            
        if config.system_prompt_format == "gorilla_prompt_explanation_json":
            answer = create_json_answer_template(model_name, explanation)
        elif config.system_prompt_format == "gorilla_prompt_explanation":
            answer = create_gorilla_explanation_answer_template(model_name, explanation)
        else:
            answer = model_name

        # Retrieve model_card
        model_card = ""
        if config.retriever:
            try:
                retriever_name = dict_retriever[config.retriever]
                retrieved_info = entry.get(retriever_name, "")
                if retrieved_info:
                    model_card = "\n<Reference API>: " + retrieved_info.replace('\r\n', '\n').strip()
            except KeyError:
                print(
                    f"Retriever '{config.retriever}' is not valid. Choose from: {list(dict_retriever.keys())}")

        if prompt and model_name:
            # Build the full prompt without extra stripping
            full_prompt = system_prompt + prompt + model_card + "\n###Response:"
            
            conversational_dataset.append({
                "prompt": full_prompt,
                "completion": " " + answer + tokenizer.eos_token,  # Keep consistent leading space
                "is_replay": is_replay,  # Flag for neighbor-contrastive loss
                "model_name": model_name,  # For contrastive loss model_id extraction
                "domain": entry.get("domain", ""),  # Domain for filtering
            })
        else:
            raise ValueError("Both 'instruction' and 'model_name' must be present in each data entry.")

    dataset = Dataset.from_list(conversational_dataset)
    return dataset


def truncate_text_by_tokens(text: str, max_tokens: int) -> str:
    """
    Truncate text to approximately max_tokens by splitting on spaces.
    Simple approximation: 1 token ≈ 4 characters.
    """
    if not text:
        return ""
    max_chars = max_tokens * 4
    if len(text) <= max_chars:
        return text
    # Truncate and add ellipsis
    truncated = text[:max_chars].rsplit(' ', 1)[0]  # Cut at word boundary
    return truncated + "..."


def convert_to_retrieval_replay_fewshot(
    raw_data: List[Dict[str, str]], 
    config: TrainConfig, 
    tokenizer,
    dataset_config: Optional[object] = None,
    experience_idx: int = 0,
    experience_name: str = "",
    replay_buffer: Optional[PromptReplayBuffer] = None,
    experience_index: Optional[ExperienceIndex] = None
) -> Dataset:
    """
    Convert dataset to conversational format with few-shot augmentation using retrieval replay.
    
    Args:
        raw_data: List of raw data entries
        config: TrainConfig with few-shot settings
        tokenizer: Tokenizer for formatting
        dataset_config: Optional dataset config
        experience_idx: Index of current experience (0-based)
        experience_name: Name of current experience
        replay_buffer: Optional replay buffer with previous experience examples
        experience_index: Optional pre-built experience index (if None, will build one)
    """
    if not config.retriever:
        raise ValueError("retriever must be specified for retrieval_replay_fewshot baseline")
    
    # Use custom system prompt if provided, otherwise use default prompts (with date if specified)
    if config.system_prompt != "":
        system_prompt = config.system_prompt
    else:
        # Get date cutoff from dataset_config if available
        model_date_cutoff = dataset_config.model_date_cutoff if dataset_config and hasattr(dataset_config, 'model_date_cutoff') else None
        use_date = model_date_cutoff is not None
        
        # Determine which prompt template to use based on system_prompt_format
        # Use few-shot variants for retrieval_replay_fewshot
        if config.system_prompt_format == "gorilla_prompt_explanation_json":
            system_prompt = create_gorilla_fewshot_prompt_explanation_json_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt_explanation_json
        elif config.system_prompt_format == "gorilla_prompt_explanation":
            system_prompt = create_gorilla_fewshot_prompt_explanation_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt_explanation
        else:
            # Default to gorilla_fewshot_prompt (or gorilla_fewshot_prompt_with_date if date is specified)
            system_prompt = create_gorilla_fewshot_prompt_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt
    
    # Build experience index if not provided
    if experience_index is None:
        replay_examples = replay_buffer.get_examples() if replay_buffer else []
        # Determine device for retriever
        device = "cuda" if torch.cuda.is_available() else "cpu"
        
        experience_index = ExperienceIndex(
            retriever_type=config.retriever,
            current_examples=raw_data,
            replay_examples=replay_examples,
            experience_name=experience_name,
            device=device
        )
    
    conversational_dataset = []
    top_k = config.fewshot_top_k
    max_card_tokens = config.fewshot_max_card_tokens
    dropout_prob = config.fewshot_dropout_prob
    
    # Set random seed for reproducibility if provided
    if config.seed is not None:
        random.seed(config.seed)
    
    # Debug: print a few examples
    num_examples_to_print = 3
    examples_printed = 0
    
    for local_idx, entry in enumerate(raw_data):
        prompt = entry.get("instruction", "").replace('\r\n', '\n').strip()
        model_name = entry.get("model_name", "").replace('\r\n', '\n').strip()
        
        if not prompt or not model_name:
            continue
        
        # Generate example_id for self-masking
        example_id = entry.get("example_id")
        if not example_id:
            example_id = generate_example_id(prompt, model_name, experience_name, local_idx)
        
        # Decide whether to use few-shot examples based on dropout probability
        use_fewshot = random.random() > dropout_prob
        
        # Build few-shot examples section (only if not dropping out)
        fewshot_section = ""
        retrieved_neighbors = None
        if use_fewshot:
            # Retrieve similar prompts with self-masking
            retrieved_neighbors = experience_index.retrieve(
                query=prompt,
                top_k=top_k,
                exclude_example_ids=[example_id]
            )
            
            if retrieved_neighbors:
                fewshot_section = "\n\n[RELATED EXAMPLES]\n"
                for i, neighbor in enumerate(retrieved_neighbors, 1):
                    neighbor_prompt = neighbor['prompt']
                    neighbor_model = neighbor['model_id']
                    neighbor_domain = neighbor.get('domain', '')
                    neighbor_card = truncate_text_by_tokens(
                        neighbor.get('model_card_snippet', ''), 
                        max_card_tokens
                    )
                    
                    fewshot_section += f"Example {i}:\n"
                    fewshot_section += f"  Prompt: {neighbor_prompt}\n"
                    fewshot_section += f"  Reference model (for similar case): {neighbor_model}\n"
                    if neighbor_domain:
                        fewshot_section += f"  Domain: {neighbor_domain}\n"
                    if neighbor_card:
                        fewshot_section += f"  Model card: {neighbor_card}\n"
                    fewshot_section += "\n"
        
        # Build augmented prompt
        augmented_prompt = f"[ORIGINAL PROMPT]\n{prompt}"
        if fewshot_section:
            augmented_prompt += fewshot_section
        
        # Get answer for dataset
        explanation = entry.get("explanation", "").replace('\r\n', '\n').strip()
        if len(explanation) > 1000:
            explanation = explanation[:1000] + "..."
        
        if config.system_prompt_format == "gorilla_prompt_explanation_json":
            answer = create_json_answer_template(model_name, explanation)
        elif config.system_prompt_format == "gorilla_prompt_explanation":
            answer = create_gorilla_explanation_answer_template(model_name, explanation)
        else:
            answer = model_name
        
        # Debug: Print examples of retrieved neighbors and final prompts
        if examples_printed < num_examples_to_print:
            print(f"\n{'='*80}")
            print(f"EXAMPLE {examples_printed + 1} - Training Example Index: {local_idx}")
            print(f"{'='*80}")
            print(f"\n[ORIGINAL PROMPT]:")
            print(f"{prompt[:200]}{'...' if len(prompt) > 200 else ''}")
            print(f"\n[EXPECTED MODEL]: {model_name}")
            print(f"[DOMAIN]: {entry.get('domain', 'N/A')}")
            
            if use_fewshot and retrieved_neighbors:
                print(f"\n[RETRIEVED SIMILAR PROMPTS] ({len(retrieved_neighbors)} examples):")
                for i, neighbor in enumerate(retrieved_neighbors, 1):
                    print(f"\n  Neighbor {i}:")
                    print(f"    Prompt: {neighbor['prompt'][:150]}{'...' if len(neighbor['prompt']) > 150 else ''}")
                    print(f"    Selected Model: {neighbor['model_id']}")
                    print(f"    Domain: {neighbor.get('domain', 'N/A')}")
                    if neighbor.get('model_card_snippet'):
                        card_preview = neighbor['model_card_snippet'][:100]
                        print(f"    Model Card: {card_preview}{'...' if len(neighbor['model_card_snippet']) > 100 else ''}")
            elif use_fewshot:
                print(f"\n[RETRIEVED SIMILAR PROMPTS]: None found")
            else:
                print(f"\n[FEW-SHOT DROPOUT]: Using original prompt without examples (dropout_prob={dropout_prob})")
            
            print(f"\n[FULL TRAINING PROMPT] (first 800 chars):")
            full_prompt_preview = (system_prompt + augmented_prompt + "\n###Response:")[:800]
            print(f"{full_prompt_preview}...")
            print(f"\n[EXPECTED COMPLETION]: {answer[:100]}{'...' if len(answer) > 100 else ''}")
            print(f"{'='*80}\n")
            examples_printed += 1
        
        full_prompt = system_prompt + augmented_prompt + "\n###Response:"
        
        conversational_dataset.append({
            "prompt": full_prompt,
            "completion": " " + answer + tokenizer.eos_token
        })
    
    dataset = Dataset.from_list(conversational_dataset)
    return dataset


def _build_domain_to_models_mapping(raw_data: List[Dict[str, str]]) -> Dict[str, List[str]]:
    """Build a mapping from domain to list of model names in that domain."""
    domain_to_models = defaultdict(set)
    for entry in raw_data:
        domain = entry.get("domain", "unknown")
        model_name = entry.get("model_name", "")
        if model_name:
            domain_to_models[domain].add(model_name)
    # Convert sets to lists for easier random sampling
    return {domain: list(models) for domain, models in domain_to_models.items()}


def _get_all_models(raw_data: List[Dict[str, str]]) -> List[str]:
    """Get all unique model names from the dataset."""
    models = set()
    for entry in raw_data:
        model_name = entry.get("model_name", "")
        if model_name:
            models.add(model_name)
    return list(models)


def _sample_rejected_model(
    entry: Dict[str, str],
    domain_to_models: Dict[str, List[str]],
    all_models: List[str],
    strategy: str,
    random_seed: Optional[int] = None
) -> Optional[str]:
    """
    Sample a rejected (incorrect) model for the given entry based on the strategy.
    
    Args:
        entry: The data entry with instruction, model_name, domain, etc.
        domain_to_models: Mapping from domain to list of models
        all_models: List of all models in the dataset
        strategy: One of "same_domain", "cross_domain", "random", or "mixed"
        random_seed: Optional random seed for reproducibility
    
    Returns:
        A rejected model name, or None if no valid rejection can be found
    """
    if random_seed is not None:
        random.seed(random_seed)
    
    correct_model = entry.get("model_name", "")
    domain = entry.get("domain", "unknown")
    
    if not correct_model:
        return None
    
    if strategy == "same_domain":
        # Sample from same domain but different model
        domain_models = domain_to_models.get(domain, [])
        # Filter out the correct model
        candidate_models = [m for m in domain_models if m != correct_model]
        if candidate_models:
            return random.choice(candidate_models)
        # Fallback to random if no other models in domain
        candidate_models = [m for m in all_models if m != correct_model]
        return random.choice(candidate_models) if candidate_models else None
    
    elif strategy == "cross_domain":
        # Sample from different domain
        other_domains = [d for d in domain_to_models.keys() if d != domain]
        if other_domains:
            selected_domain = random.choice(other_domains)
            domain_models = domain_to_models[selected_domain]
            # Still filter out correct model in case it appears in multiple domains
            candidate_models = [m for m in domain_models if m != correct_model]
            if candidate_models:
                return random.choice(candidate_models)
        # Fallback to random
        candidate_models = [m for m in all_models if m != correct_model]
        return random.choice(candidate_models) if candidate_models else None
    
    elif strategy == "random":
        # Random model from entire dataset
        candidate_models = [m for m in all_models if m != correct_model]
        return random.choice(candidate_models) if candidate_models else None
    
    elif strategy == "mixed":
        # Randomly choose between same_domain and cross_domain strategies
        # This gives a balanced mix of both types of negative samples
        chosen_strategy = random.choice(["same_domain", "cross_domain"])
        if chosen_strategy == "same_domain":
            # Sample from same domain but different model
            domain_models = domain_to_models.get(domain, [])
            candidate_models = [m for m in domain_models if m != correct_model]
            if candidate_models:
                return random.choice(candidate_models)
        else:  # cross_domain
            # Sample from different domain
            other_domains = [d for d in domain_to_models.keys() if d != domain]
            if other_domains:
                selected_domain = random.choice(other_domains)
                domain_models = domain_to_models[selected_domain]
                candidate_models = [m for m in domain_models if m != correct_model]
                if candidate_models:
                    return random.choice(candidate_models)
        # Fallback to random if chosen strategy doesn't yield results
        candidate_models = [m for m in all_models if m != correct_model]
        return random.choice(candidate_models) if candidate_models else None
    
    else:
        raise ValueError(f"Unknown negative sampling strategy: {strategy}. "
                        f"Choose from: 'same_domain', 'cross_domain', 'random', 'mixed'")


def convert_to_preference_dataset(
    raw_data: List[Dict[str, str]], 
    config: TrainConfig, 
    tokenizer,
    dataset_config: Optional[object] = None,
    negative_sampling_strategy: str = "same_domain",
    num_rejections_per_example: int = 1,
    random_seed: Optional[int] = None
) -> Dataset:
    """
    Convert raw dataset to preference optimization format (DPO/RLHF style).
    
    Args:
        raw_data: List of raw data entries with instruction, model_name, domain, etc.
        config: TrainConfig with system prompt settings
        tokenizer: Tokenizer for formatting responses
        dataset_config: Optional dataset config (e.g., ApibenchDataConfig)
        negative_sampling_strategy: Strategy for sampling rejected models
            - "same_domain": Sample from same domain (default, recommended)
            - "cross_domain": Sample from different domain
            - "random": Random sample from all models
            - "mixed": Randomly choose between same_domain and cross_domain strategies
        num_rejections_per_example: Number of rejected responses per example (default: 1)
        random_seed: Optional random seed for reproducibility
    
    Returns:
        Dataset with format: {"prompt": str, "chosen": str, "rejected": str}
    """
    # Build domain-to-models mapping for negative sampling
    domain_to_models = _build_domain_to_models_mapping(raw_data)
    all_models = _get_all_models(raw_data)
    
    # Use custom system prompt if provided, otherwise use default prompts (with date if specified)
    if config.system_prompt != "":
        system_prompt = config.system_prompt
    else:
        # Get date cutoff from dataset_config if available
        model_date_cutoff = dataset_config.model_date_cutoff if dataset_config and hasattr(dataset_config, 'model_date_cutoff') else None
        use_date = model_date_cutoff is not None
        
        # Determine which prompt template to use based on system_prompt_format
        # Use few-shot variants for retrieval_replay_fewshot
        if config.system_prompt_format == "gorilla_prompt_explanation_json":
            system_prompt = create_gorilla_fewshot_prompt_explanation_json_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt_explanation_json
        elif config.system_prompt_format == "gorilla_prompt_explanation":
            system_prompt = create_gorilla_fewshot_prompt_explanation_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt_explanation
        else:
            # Default to gorilla_fewshot_prompt (or gorilla_fewshot_prompt_with_date if date is specified)
            system_prompt = create_gorilla_fewshot_prompt_with_date(model_date_cutoff) if use_date else gorilla_fewshot_prompt
    
    preference_dataset = []
    
    for entry_idx, entry in enumerate(raw_data):
        prompt = entry.get("instruction", "").replace('\r\n', '\n').strip()
        correct_model_name = entry.get("model_name", "").replace('\r\n', '\n').strip()
        
        if not prompt or not correct_model_name:
            continue
        
        explanation = entry.get("explanation", "").replace('\r\n', '\n').strip()
        if len(explanation) > 1000:
            explanation = explanation[:1000] + "..."
        
        # Format the chosen (correct) response
        if config.system_prompt_format == "gorilla_prompt_explanation_json":
            chosen_response = create_json_answer_template(correct_model_name, explanation)
        elif config.system_prompt_format == "gorilla_prompt_explanation":
            chosen_response = create_gorilla_explanation_answer_template(correct_model_name, explanation)
        else:
            chosen_response = correct_model_name
        
        # Retrieve model_card for prompt
        model_card = ""
        if config.retriever:
            try:
                retriever_name = dict_retriever[config.retriever]
                retrieved_info = entry.get(retriever_name, "")
                if retrieved_info:
                    model_card = "\n<Reference API>: " + retrieved_info.replace('\r\n', '\n').strip()
            except KeyError:
                print(
                    f"Retriever '{config.retriever}' is not valid. Choose from: {list(dict_retriever.keys())}")
        
        # Build the full prompt
        full_prompt = system_prompt + prompt + model_card + "\n###Response:"
        
        # Sample rejected models
        for rejection_idx in range(num_rejections_per_example):
            # Use entry index and rejection index for seed to ensure reproducibility
            rejection_seed = (random_seed + entry_idx * num_rejections_per_example + rejection_idx) if random_seed is not None else None
            
            rejected_model = _sample_rejected_model(
                entry,
                domain_to_models,
                all_models,
                negative_sampling_strategy,
                rejection_seed
            )
            
            if rejected_model is None:
                # Skip if we can't find a valid rejection
                continue
            
            # Format the rejected response (no explanation for rejected, just model name)
            if config.system_prompt_format == "gorilla_prompt_explanation_json":
                # For rejected, we could use empty explanation or generate a generic one
                # Using empty explanation for simplicity
                rejected_response = create_json_answer_template(rejected_model, "")
            elif config.system_prompt_format == "gorilla_prompt_explanation":
                rejected_response = create_gorilla_explanation_answer_template(rejected_model, "")
            else:
                rejected_response = rejected_model
            
            preference_dataset.append({
                "prompt": full_prompt,
                "chosen": " " + chosen_response + tokenizer.eos_token,
                "rejected": " " + rejected_response + tokenizer.eos_token
            })
    
    dataset = Dataset.from_list(preference_dataset)
    return dataset