#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <rte_eal.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include "../tokenizer/bpe_util.h"

// Test configuration
#define TEST_SOCKET_ID 0
#define TEST_MAX_TOKENS 100

// Global test counters
static int tests_run = 0;
static int tests_passed = 0;
static int tests_failed = 0;

// Test helper macros
#define TEST_ASSERT(condition, message) \
    do { \
        tests_run++; \
        if (condition) { \
            tests_passed++; \
            printf("PASS: %s\n", message); \
        } else { \
            tests_failed++; \
            printf("FAIL: %s\n", message); \
        } \
    } while (0)

#define TEST_ASSERT_EQ(actual, expected, message) \
    do { \
        tests_run++; \
        if ((actual) == (expected)) { \
            tests_passed++; \
            printf("PASS: %s (expected: %d, actual: %d)\n", message, expected, actual); \
        } else { \
            tests_failed++; \
            printf("FAIL: %s (expected: %d, actual: %d)\n", message, expected, actual); \
        } \
    } while (0)

#define TEST_ASSERT_STR_EQ(actual, expected, message) \
    do { \
        tests_run++; \
        if (strcmp(actual, expected) == 0) { \
            tests_passed++; \
            printf("PASS: %s\n", message); \
        } else { \
            tests_failed++; \
            printf("FAIL: %s (expected: '%s', actual: '%s')\n", message, expected, actual); \
        } \
    } while (0)

/**
 * Test BPE model initialization and cleanup
 */
void test_bpe_init_cleanup(void) {
    printf("\n=== Testing BPE Initialization and Cleanup ===\n");
    
    // Test initialization
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Test getting initial stats
    int vocab_size = dpdk_bpe_get_vocab_size();
    TEST_ASSERT(vocab_size > 0, "Initial vocabulary should have ASCII characters");
    
    int merge_count = dpdk_bpe_get_merge_count();
    TEST_ASSERT_EQ(merge_count, 0, "Initial merge count should be 0");
    
    // Test cleanup
    dpdk_bpe_cleanup();
    
    // Test operations after cleanup should fail
    ret = dpdk_bpe_get_vocab_size();
    TEST_ASSERT_EQ(ret, -1, "Operations after cleanup should fail");
}

/**
 * Test BPE merge rule addition
 */
void test_bpe_add_merge(void) {
    printf("\n=== Testing BPE Merge Rule Addition ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Add some merge rules
    ret = dpdk_bpe_add_merge("t", "h", 100);
    TEST_ASSERT_EQ(ret, 0, "Adding merge rule 't' + 'h' should succeed");
    
    ret = dpdk_bpe_add_merge("th", "e", 200);
    TEST_ASSERT_EQ(ret, 0, "Adding merge rule 'th' + 'e' should succeed");
    
    ret = dpdk_bpe_add_merge("a", "n", 150);
    TEST_ASSERT_EQ(ret, 0, "Adding merge rule 'a' + 'n' should succeed");
    
    // Check merge count
    int merge_count = dpdk_bpe_get_merge_count();
    TEST_ASSERT_EQ(merge_count, 3, "Merge count should be 3");
    
    dpdk_bpe_cleanup();
}

/**
 * Test basic BPE tokenization
 */
void test_bpe_tokenization_basic(void) {
    printf("\n=== Testing Basic BPE Tokenization ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Test tokenization without merge rules (should split into characters)
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int token_ids[TEST_MAX_TOKENS];
    int num_tokens = dpdk_bpe_tokenize("hello", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 5, "Basic tokenization should produce 5 character tokens");
    
    if (num_tokens == 5) {
        TEST_ASSERT_STR_EQ(tokens[0], "h", "First token should be 'h'");
        TEST_ASSERT_STR_EQ(tokens[1], "e", "Second token should be 'e'");
        TEST_ASSERT_STR_EQ(tokens[2], "l", "Third token should be 'l'");
        TEST_ASSERT_STR_EQ(tokens[3], "l", "Fourth token should be 'l'");
        TEST_ASSERT_STR_EQ(tokens[4], "o", "Fifth token should be 'o'");
    }
    
    dpdk_bpe_cleanup();
}

/**
 * Test BPE tokenization with merge rules
 */
void test_bpe_tokenization_with_merges(void) {
    printf("\n=== Testing BPE Tokenization with Merge Rules ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Add merge rules for "the"
    dpdk_bpe_add_merge("t", "h", 100);
    dpdk_bpe_add_merge("th", "e", 200);
    
    // Test tokenization
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int token_ids[TEST_MAX_TOKENS];
    int num_tokens = dpdk_bpe_tokenize("the", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 1, "Tokenization of 'the' should produce 1 token");
    
    if (num_tokens == 1) {
        TEST_ASSERT_STR_EQ(tokens[0], "the", "Token should be 'the'");
    }
    
    // Test tokenization with partial merge
    num_tokens = dpdk_bpe_tokenize("that", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 3, "Tokenization of 'that' should produce 3 tokens");
    
    if (num_tokens == 3) {
        TEST_ASSERT_STR_EQ(tokens[0], "th", "First token should be 'th'");
        TEST_ASSERT_STR_EQ(tokens[1], "a", "Second token should be 'a'");
        TEST_ASSERT_STR_EQ(tokens[2], "t", "Third token should be 't'");
    }
    
    dpdk_bpe_cleanup();
}

/**
 * Test BPE merge priority handling
 */
void test_bpe_merge_priority(void) {
    printf("\n=== Testing BPE Merge Priority ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Add merge rules with different priorities
    dpdk_bpe_add_merge("a", "b", 100);  // Lower priority
    dpdk_bpe_add_merge("b", "c", 200);  // Higher priority
    
    // Test tokenization - higher priority merge should be applied first
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int token_ids[TEST_MAX_TOKENS];
    int num_tokens = dpdk_bpe_tokenize("abc", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Tokenization of 'abc' should produce 2 tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "a", "First token should be 'a'");
        TEST_ASSERT_STR_EQ(tokens[1], "bc", "Second token should be 'bc'");
    }
    
    dpdk_bpe_cleanup();
}

/**
 * Test BPE error handling
 */
void test_bpe_error_handling(void) {
    printf("\n=== Testing BPE Error Handling ===\n");
    
    // Test operations without initialization
    int ret = dpdk_bpe_tokenize("test", NULL, NULL, 0);
    TEST_ASSERT_EQ(ret, -1, "Tokenization without initialization should fail");
    
    ret = dpdk_bpe_add_merge("a", "b", 100);
    TEST_ASSERT_EQ(ret, -1, "Adding merge without initialization should fail");
    
    // Initialize model
    ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Test invalid parameters
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int token_ids[TEST_MAX_TOKENS];
    ret = dpdk_bpe_tokenize(NULL, tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(ret, -1, "Tokenization with NULL text should fail");
    
    ret = dpdk_bpe_tokenize("test", NULL, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(ret, -1, "Tokenization with NULL tokens should fail");
    
    ret = dpdk_bpe_tokenize("test", tokens, token_ids, 0);
    TEST_ASSERT_EQ(ret, -1, "Tokenization with max_tokens=0 should fail");
    
    dpdk_bpe_cleanup();
}

/**
 * Test BPE statistics
 */
void test_bpe_statistics(void) {
    printf("\n=== Testing BPE Statistics ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Get initial statistics
    struct bpe_stats stats;
    ret = dpdk_bpe_get_stats(&stats);
    TEST_ASSERT_EQ(ret, 0, "Getting statistics should succeed");
    
    TEST_ASSERT(stats.vocab_size > 0, "Vocabulary size should be positive");
    TEST_ASSERT_EQ(stats.merge_count, 0, "Initial merge count should be 0");
    TEST_ASSERT(stats.token_pool_size > 0, "Token pool size should be positive");
    TEST_ASSERT(stats.token_pool_free > 0, "Token pool should have free entries");
    
    // Add some merges and check stats
    dpdk_bpe_add_merge("a", "b", 100);
    dpdk_bpe_add_merge("c", "d", 200);
    
    ret = dpdk_bpe_get_stats(&stats);
    TEST_ASSERT_EQ(ret, 0, "Getting statistics should succeed");
    TEST_ASSERT_EQ(stats.merge_count, 2, "Merge count should be 2");
    
    dpdk_bpe_cleanup();
}

/**
 * Test BPE file loading (mock test since we don't have actual files)
 */
void test_bpe_file_loading(void) {
    printf("\n=== Testing BPE File Loading ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Test loading non-existent file
    ret = dpdk_bpe_load_merges_from_file("non_existent_file.txt", false);
    TEST_ASSERT_EQ(ret, -1, "Loading non-existent file should fail");
    
    dpdk_bpe_cleanup();
}

/**
 * Test BPE complex tokenization scenarios
 */
void test_bpe_complex_scenarios(void) {
    printf("\n=== Testing BPE Complex Scenarios ===\n");
    
    // Initialize model
    int ret = dpdk_bpe_init(TEST_SOCKET_ID, BPE_MODEL_MODERNBERT);
    TEST_ASSERT_EQ(ret, 0, "BPE initialization should succeed");
    
    // Add comprehensive merge rules
    dpdk_bpe_add_merge("h", "e", 100);
    dpdk_bpe_add_merge("l", "l", 150);
    dpdk_bpe_add_merge("he", "ll", 200);
    dpdk_bpe_add_merge("o", " ", 50);
    dpdk_bpe_add_merge("w", "o", 75);
    
    // Test complex tokenization
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int token_ids[TEST_MAX_TOKENS];
    int num_tokens = dpdk_bpe_tokenize("hello world", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT(num_tokens > 0, "Complex tokenization should succeed");
    
    // Test empty string
    num_tokens = dpdk_bpe_tokenize("", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 0, "Empty string should produce 0 tokens");
    
    // Test single character
    num_tokens = dpdk_bpe_tokenize("a", tokens, token_ids, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 1, "Single character should produce 1 token");
    
    if (num_tokens == 1) {
        TEST_ASSERT_STR_EQ(tokens[0], "a", "Single character token should be 'a'");
    }
    
    dpdk_bpe_cleanup();
}

/**
 * Print test summary
 */
void print_test_summary(void) {
    printf("\n=== BPE Test Summary ===\n");
    printf("Total tests run: %d\n", tests_run);
    printf("Tests passed: %d\n", tests_passed);
    printf("Tests failed: %d\n", tests_failed);
    printf("Success rate: %.2f%%\n", 
           tests_run > 0 ? (100.0 * tests_passed / tests_run) : 0.0);
    
    if (tests_failed == 0) {
        printf("All tests PASSED!\n");
    } else {
        printf("Some tests FAILED!\n");
    }
}

/**
 * Main test function
 */
int main(int argc, char *argv[]) {
    // Initialize DPDK EAL
    int ret = rte_eal_init(argc, argv);
    if (ret < 0) {
        printf("Error: DPDK EAL initialization failed\n");
        return -1;
    }
    
    printf("Starting BPE Utility Tests...\n");
    
    // Run all tests
    test_bpe_init_cleanup();
    test_bpe_add_merge();
    test_bpe_tokenization_basic();
    test_bpe_tokenization_with_merges();
    test_bpe_merge_priority();
    test_bpe_error_handling();
    test_bpe_statistics();
    test_bpe_file_loading();
    test_bpe_complex_scenarios();
    
    // Print summary
    print_test_summary();
    
    // Cleanup DPDK
    rte_eal_cleanup();
    
    return tests_failed > 0 ? 1 : 0;
}
