#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <rte_eal.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include "../tokenizer/wordpiece_util.h"

// Test configuration
#define TEST_SOCKET_ID 0
#define TEST_MAX_TOKENS 100

// Global test counters
static int tests_run = 0;
static int tests_passed = 0;
static int tests_failed = 0;

// Test helper macros
#define TEST_ASSERT(condition, message) \
    do { \
        tests_run++; \
        if (condition) { \
            tests_passed++; \
            printf("PASS: %s\n", message); \
        } else { \
            tests_failed++; \
            printf("FAIL: %s\n", message); \
        } \
    } while (0)

#define TEST_ASSERT_EQ(actual, expected, message) \
    do { \
        tests_run++; \
        if ((actual) == (expected)) { \
            tests_passed++; \
            printf("PASS: %s (expected: %d, actual: %d)\n", message, expected, actual); \
        } else { \
            tests_failed++; \
            printf("FAIL: %s (expected: %d, actual: %d)\n", message, expected, actual); \
        } \
    } while (0)

#define TEST_ASSERT_STR_EQ(actual, expected, message) \
    do { \
        tests_run++; \
        if (strcmp(actual, expected) == 0) { \
            tests_passed++; \
            printf("PASS: %s\n", message); \
        } else { \
            tests_failed++; \
            printf("FAIL: %s (expected: '%s', actual: '%s')\n", message, expected, actual); \
        } \
    } while (0)

/**
 * Test WordPiece model initialization and cleanup
 */
void test_wordpiece_init_cleanup(void) {
    printf("\n=== Testing WordPiece Initialization and Cleanup ===\n");
    
    // Test initialization
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Test getting initial stats
    int vocab_size = dpdk_wordpiece_get_vocab_size();
    TEST_ASSERT_EQ(vocab_size, 1, "Initial vocabulary should have UNK token");
    
    // Test cleanup
    dpdk_wordpiece_cleanup();
    
    // Test operations after cleanup should fail
    ret = dpdk_wordpiece_get_vocab_size();
    TEST_ASSERT_EQ(ret, -1, "Operations after cleanup should fail");
}

/**
 * Test WordPiece vocabulary addition
 */
void test_wordpiece_add_token(void) {
    printf("\n=== Testing WordPiece Token Addition ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add some tokens
    int token_id = dpdk_wordpiece_add_token("hello");
    TEST_ASSERT(token_id >= 0, "Adding token 'hello' should succeed");
    
    token_id = dpdk_wordpiece_add_token("world");
    TEST_ASSERT(token_id >= 0, "Adding token 'world' should succeed");
    
    token_id = dpdk_wordpiece_add_token("##ing");
    TEST_ASSERT(token_id >= 0, "Adding token '##ing' should succeed");
    
    // Check vocabulary size
    int vocab_size = dpdk_wordpiece_get_vocab_size();
    TEST_ASSERT_EQ(vocab_size, 4, "Vocabulary size should be 4 (including UNK)");
    
    // Test adding duplicate token
    int original_id = dpdk_wordpiece_get_id_by_token("hello");
    token_id = dpdk_wordpiece_add_token("hello");
    TEST_ASSERT_EQ(token_id, original_id, "Adding duplicate token should return existing ID");
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece token lookup functions
 */
void test_wordpiece_token_lookup(void) {
    printf("\n=== Testing WordPiece Token Lookup ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add some tokens
    int hello_id = dpdk_wordpiece_add_token("hello");
    int world_id = dpdk_wordpiece_add_token("world");
    int subword_id = dpdk_wordpiece_add_token("##ing");
    
    // Test token ID lookup
    int found_id = dpdk_wordpiece_get_id_by_token("hello");
    TEST_ASSERT_EQ(found_id, hello_id, "Token ID lookup for 'hello' should match");
    
    found_id = dpdk_wordpiece_get_id_by_token("world");
    TEST_ASSERT_EQ(found_id, world_id, "Token ID lookup for 'world' should match");
    
    found_id = dpdk_wordpiece_get_id_by_token("##ing");
    TEST_ASSERT_EQ(found_id, subword_id, "Token ID lookup for '##ing' should match");
    
    // Test token string lookup
    const char* token_str = dpdk_wordpiece_get_token_by_id(hello_id);
    TEST_ASSERT_STR_EQ(token_str, "hello", "Token string lookup should return 'hello'");
    
    token_str = dpdk_wordpiece_get_token_by_id(world_id);
    TEST_ASSERT_STR_EQ(token_str, "world", "Token string lookup should return 'world'");
    
    // Test lookup of non-existent token
    found_id = dpdk_wordpiece_get_id_by_token("nonexistent");
    TEST_ASSERT_EQ(found_id, -1, "Lookup of non-existent token should return -1");
    
    token_str = dpdk_wordpiece_get_token_by_id(9999);
    TEST_ASSERT(token_str == NULL, "Lookup of invalid ID should return NULL");
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test basic WordPiece tokenization
 */
void test_wordpiece_tokenization_basic(void) {
    printf("\n=== Testing Basic WordPiece Tokenization ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add basic vocabulary
    dpdk_wordpiece_add_token("hello");
    dpdk_wordpiece_add_token("world");
    
    // Test simple tokenization
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int num_tokens = dpdk_wordpiece_tokenize("hello world", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Simple tokenization should produce 2 tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "hello", "First token should be 'hello'");
        TEST_ASSERT_STR_EQ(tokens[1], "world", "Second token should be 'world'");
    }
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece tokenization with subwords
 */
void test_wordpiece_tokenization_subwords(void) {
    printf("\n=== Testing WordPiece Tokenization with Subwords ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add vocabulary with subwords
    dpdk_wordpiece_add_token("play");
    dpdk_wordpiece_add_token("##ing");
    dpdk_wordpiece_add_token("##ed");
    dpdk_wordpiece_add_token("run");
    dpdk_wordpiece_add_token("##ning");
    
    // Test subword tokenization
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int num_tokens = dpdk_wordpiece_tokenize("playing", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Subword tokenization should produce 2 tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "play", "First token should be 'play'");
        TEST_ASSERT_STR_EQ(tokens[1], "##ing", "Second token should be '##ing'");
    }
    
    // Test another subword case
    num_tokens = dpdk_wordpiece_tokenize("running", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Subword tokenization should produce 2 tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "run", "First token should be 'run'");
        TEST_ASSERT_STR_EQ(tokens[1], "##ning", "Second token should be '##ning'");
    }
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece tokenization with unknown tokens
 */
void test_wordpiece_unknown_tokens(void) {
    printf("\n=== Testing WordPiece Unknown Token Handling ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add limited vocabulary
    dpdk_wordpiece_add_token("hello");
    dpdk_wordpiece_add_token("world");
    
    // Test unknown word tokenization
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int num_tokens = dpdk_wordpiece_tokenize("hello unknown", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Unknown word tokenization should produce 2 tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "hello", "First token should be 'hello'");
        TEST_ASSERT_STR_EQ(tokens[1], UNK_TOKEN, "Second token should be UNK token");
    }
    
    // Test completely unknown text
    num_tokens = dpdk_wordpiece_tokenize("xyz abc", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Unknown text should produce 2 UNK tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], UNK_TOKEN, "First token should be UNK token");
        TEST_ASSERT_STR_EQ(tokens[1], UNK_TOKEN, "Second token should be UNK token");
    }
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece greedy longest match
 */
void test_wordpiece_greedy_matching(void) {
    printf("\n=== Testing WordPiece Greedy Longest Match ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add vocabulary with overlapping tokens
    dpdk_wordpiece_add_token("un");
    dpdk_wordpiece_add_token("##able");
    dpdk_wordpiece_add_token("##abl");
    dpdk_wordpiece_add_token("##e");
    
    // Test greedy longest match - should prefer longer match
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int num_tokens = dpdk_wordpiece_tokenize("unable", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Greedy matching should produce 2 tokens");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "un", "First token should be 'un'");
        TEST_ASSERT_STR_EQ(tokens[1], "##able", "Second token should be '##able' (longer match)");
    }
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece error handling
 */
void test_wordpiece_error_handling(void) {
    printf("\n=== Testing WordPiece Error Handling ===\n");
    
    // Test operations without initialization
    int ret = dpdk_wordpiece_tokenize("test", NULL, 0);
    TEST_ASSERT_EQ(ret, -1, "Tokenization without initialization should fail");
    
    ret = dpdk_wordpiece_add_token("test");
    TEST_ASSERT_EQ(ret, -1, "Adding token without initialization should fail");
    
    // Initialize model
    ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Test invalid parameters
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    ret = dpdk_wordpiece_tokenize(NULL, tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(ret, -1, "Tokenization with NULL text should fail");
    
    ret = dpdk_wordpiece_tokenize("test", NULL, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(ret, -1, "Tokenization with NULL tokens should fail");
    
    ret = dpdk_wordpiece_tokenize("test", tokens, 0);
    TEST_ASSERT_EQ(ret, -1, "Tokenization with max_tokens=0 should fail");
    
    ret = dpdk_wordpiece_add_token(NULL);
    TEST_ASSERT_EQ(ret, -1, "Adding NULL token should fail");
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece statistics
 */
void test_wordpiece_statistics(void) {
    printf("\n=== Testing WordPiece Statistics ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Get initial statistics
    struct wordpiece_stats stats;
    ret = dpdk_wordpiece_get_stats(&stats);
    TEST_ASSERT_EQ(ret, 0, "Getting statistics should succeed");
    
    TEST_ASSERT_EQ(stats.vocab_size, 1, "Initial vocabulary size should be 1 (UNK)");
    TEST_ASSERT_EQ(stats.unk_token_id, 0, "UNK token ID should be 0");
    TEST_ASSERT(stats.token_pool_size > 0, "Token pool size should be positive");
    TEST_ASSERT(stats.token_pool_free > 0, "Token pool should have free entries");
    TEST_ASSERT(stats.word_pool_size > 0, "Word pool size should be positive");
    TEST_ASSERT(stats.word_pool_free > 0, "Word pool should have free entries");
    TEST_ASSERT(stats.hash_entries >= 0, "Hash entries should be non-negative");
    
    // Add some tokens and check stats
    dpdk_wordpiece_add_token("hello");
    dpdk_wordpiece_add_token("world");
    dpdk_wordpiece_add_token("##ing");
    
    ret = dpdk_wordpiece_get_stats(&stats);
    TEST_ASSERT_EQ(ret, 0, "Getting statistics should succeed");
    TEST_ASSERT_EQ(stats.vocab_size, 4, "Vocabulary size should be 4");
    TEST_ASSERT(stats.hash_entries > 0, "Hash entries should be positive");
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece file loading (mock test since we don't have actual files)
 */
void test_wordpiece_file_loading(void) {
    printf("\n=== Testing WordPiece File Loading ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Test loading non-existent file
    ret = dpdk_wordpiece_load_vocab_from_file("non_existent_vocab.txt");
    TEST_ASSERT_EQ(ret, -1, "Loading non-existent file should fail");
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece complex tokenization scenarios
 */
void test_wordpiece_complex_scenarios(void) {
    printf("\n=== Testing WordPiece Complex Scenarios ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add comprehensive vocabulary
    dpdk_wordpiece_add_token("the");
    dpdk_wordpiece_add_token("quick");
    dpdk_wordpiece_add_token("brown");
    dpdk_wordpiece_add_token("fox");
    dpdk_wordpiece_add_token("jump");
    dpdk_wordpiece_add_token("##s");
    dpdk_wordpiece_add_token("##ed");
    dpdk_wordpiece_add_token("##ing");
    dpdk_wordpiece_add_token("over");
    
    // Test complex sentence
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int num_tokens = dpdk_wordpiece_tokenize("the quick brown fox jumps over", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT(num_tokens > 0, "Complex tokenization should succeed");
    TEST_ASSERT(num_tokens <= 7, "Complex tokenization should produce reasonable number of tokens");
    
    // Test empty string
    num_tokens = dpdk_wordpiece_tokenize("", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 0, "Empty string should produce 0 tokens");
    
    // Test single word
    num_tokens = dpdk_wordpiece_tokenize("jumping", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT(num_tokens > 0, "Single word tokenization should succeed");
    
    // Test whitespace handling
    num_tokens = dpdk_wordpiece_tokenize("  the   quick  ", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Whitespace should be handled correctly");
    
    if (num_tokens == 2) {
        TEST_ASSERT_STR_EQ(tokens[0], "the", "First token should be 'the'");
        TEST_ASSERT_STR_EQ(tokens[1], "quick", "Second token should be 'quick'");
    }
    
    dpdk_wordpiece_cleanup();
}

/**
 * Test WordPiece edge cases
 */
void test_wordpiece_edge_cases(void) {
    printf("\n=== Testing WordPiece Edge Cases ===\n");
    
    // Initialize model
    int ret = dpdk_wordpiece_init(TEST_SOCKET_ID);
    TEST_ASSERT_EQ(ret, 0, "WordPiece initialization should succeed");
    
    // Add some tokens
    dpdk_wordpiece_add_token("a");
    dpdk_wordpiece_add_token("##b");
    
    // Test very short input
    char tokens[TEST_MAX_TOKENS][MAX_TOKEN_LEN];
    int num_tokens = dpdk_wordpiece_tokenize("a", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 1, "Single character should produce 1 token");
    
    // Test multiple spaces
    num_tokens = dpdk_wordpiece_tokenize("a     a", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Multiple spaces should be treated as single separator");
    
    // Test tabs and newlines
    num_tokens = dpdk_wordpiece_tokenize("a\t\na", tokens, TEST_MAX_TOKENS);
    TEST_ASSERT_EQ(num_tokens, 2, "Tabs and newlines should be treated as separators");
    
    dpdk_wordpiece_cleanup();
}

/**
 * Print test summary
 */
void print_test_summary(void) {
    printf("\n=== WordPiece Test Summary ===\n");
    printf("Total tests run: %d\n", tests_run);
    printf("Tests passed: %d\n", tests_passed);
    printf("Tests failed: %d\n", tests_failed);
    printf("Success rate: %.2f%%\n", 
           tests_run > 0 ? (100.0 * tests_passed / tests_run) : 0.0);
    
    if (tests_failed == 0) {
        printf("All tests PASSED!\n");
    } else {
        printf("Some tests FAILED!\n");
    }
}

/**
 * Main test function
 */
int main(int argc, char *argv[]) {
    // Initialize DPDK EAL
    int ret = rte_eal_init(argc, argv);
    if (ret < 0) {
        printf("Error: DPDK EAL initialization failed\n");
        return -1;
    }
    
    printf("Starting WordPiece Utility Tests...\n");
    
    // Run all tests
    test_wordpiece_init_cleanup();
    test_wordpiece_add_token();
    test_wordpiece_token_lookup();
    test_wordpiece_tokenization_basic();
    test_wordpiece_tokenization_subwords();
    test_wordpiece_unknown_tokens();
    test_wordpiece_greedy_matching();
    test_wordpiece_error_handling();
    test_wordpiece_statistics();
    test_wordpiece_file_loading();
    test_wordpiece_complex_scenarios();
    test_wordpiece_edge_cases();
    
    // Print summary
    print_test_summary();
    
    // Cleanup DPDK
    rte_eal_cleanup();
    
    return tests_failed > 0 ? 1 : 0;
}
