import random
import csv
import string
from word_lists import (
    ENGLISH_WORDS, GERMAN_WORDS, DUTCH_WORDS, FRENCH_WORDS,
    POLISH_WORDS, HUNGARIAN_WORDS, SWAHILI_WORDS, JAPANESE_WORDS,
    TLDS, LEETSPEAK_WORDS, MISSPELLED_WORDS, SHUFFLED_ENGLISH_WORDS
)


def generate_random_alphanumeric(length):
    """Generate random alphanumeric string of specified length"""
    chars = string.ascii_lowercase + string.digits
    return ''.join(random.choice(chars) for _ in range(length))


def generate_random_domain(tlds, num_domains=10, max_length=20):
    """Generate domain using random alphanumeric characters"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Generate random length (3-15 characters)
        domain_length = random.randint(3, 15)
        # Generate random alphanumeric string
        domain_name = generate_random_alphanumeric(domain_length)
        # Select TLD
        tld = random.choice(tlds)
        # Generate domain name
        domain = f"{domain_name}{tld}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def generate_domain(word_list, tlds, num_domains=10, max_length=20):
    """Generate domain using specified word list and TLD"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Select first word from 3-6 character words
        first_word = random.choice([w for w in word_list if 3 <= len(w) <= 6])
        
        second_word = random.choice(
            [w for w in word_list if len(w) <= remaining_length]
        )
        
        # Select TLD
        tld = random.choice(tlds)
        
        # Generate domain name
        domain = f"{first_word}{second_word}{tld}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def generate_domain_without_tld(word_list, num_domains=10, max_length=20):
    """指定された単語リストを使用してTLDなしのドメインを生成"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Select first word from 3-6 character words
        first_word = random.choice([w for w in word_list if 3 <= len(w) <= 6])
        
        remaining_length = max_length - len(first_word)
        second_word = random.choice(
            [w for w in word_list if len(w) <= remaining_length]
        )
        
        domain = f"{first_word}{second_word}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def generate_consonant_domain(num_domains=10, max_length=20):
    """子音をランダムに並べてドメインを生成"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Generate random length (3-15 characters)
        domain_length = random.randint(3, 15)
        domain_name = ''.join(
            random.choice(consonants) for _ in range(domain_length)
        )
        # Select TLD
        tld = random.choice(TLDS)
        # Generate domain name
        domain = f"{domain_name}{tld}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def generate_vowel_domain(num_domains=10, max_length=20):
    """母音をランダムに並べてドメインを生成"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Generate random length (3-15 characters)
        domain_length = random.randint(3, 15)
        domain_name = ''.join(
            random.choice(vowels) for _ in range(domain_length)
        )
        # Select TLD
        tld = random.choice(TLDS)
        # Generate domain name
        domain = f"{domain_name}{tld}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def generate_numeric_domain(num_domains=10, max_length=20):
    """数字をランダムに並べてドメインを生成"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Generate random length (3-15 characters)
        domain_length = random.randint(3, 15)
        domain_name = ''.join(
            random.choice(digits) for _ in range(domain_length)
        )
        # Select TLD
        tld = random.choice(TLDS)
        # Generate domain name
        domain = f"{domain_name}{tld}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def generate_random_alphabetic_domain(num_domains=10, max_length=20):
    """ランダムなアルファベットのドメインを生成"""
    domains = set()  # Use set to prevent duplicates
    attempts = 0
    max_attempts = num_domains * 2  # Set maximum attempts
    
    while len(domains) < num_domains and attempts < max_attempts:
        # Generate random length (3-15 characters)
        domain_length = random.randint(3, 15)
        domain_name = ''.join(random.choice(string.ascii_lowercase) for _ in range(domain_length))
        # Select TLD
        tld = random.choice(TLDS)
        # Generate domain name
        domain = f"{domain_name}{tld}"
        if len(domain) <= max_length:
            domains.add(domain)
        attempts += 1
    
    return list(domains)


def save_domains_to_csv(domains, filename):
    """生成されたドメインをCSVファイルに保存"""
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['domain'])
            writer.writerow([domain])


def main():
    languages = {
        'english': ENGLISH_WORDS,
        'german': GERMAN_WORDS,
        'dutch': DUTCH_WORDS,
        'french': FRENCH_WORDS,
        'polish': POLISH_WORDS,
        'hungarian': HUNGARIAN_WORDS,
        'swahili': SWAHILI_WORDS,
        'japanese': JAPANESE_WORDS,
        'leetspeak': LEETSPEAK_WORDS,
        'missspelled': MISSPELLED_WORDS,
        'shuffled_english': SHUFFLED_ENGLISH_WORDS
    }
    
    for lang, word_list in languages.items():
        domains = generate_domain(word_list, TLDS, num_domains=5000)
        filename = f"data_generalization/{lang}_domains.csv"
        save_domains_to_csv(domains, filename)
        print(f"Generated {len(domains)} {lang} domains with TLD and saved to {filename}")
        
    
    domains_without_tld = generate_domain_without_tld(ENGLISH_WORDS, num_domains=5000)
    filename = "data_generalization/english_domains_without_tld.csv"
    save_domains_to_csv(domains_without_tld, filename)
    print(f"Generated {len(domains_without_tld)} english domains without TLD and saved to {filename}")
    
    consonant_domains = generate_consonant_domain(num_domains=5000)
    filename = "data_generalization/consonant_random_domains.csv"
    save_domains_to_csv(consonant_domains, filename)
    print(f"Generated {len(consonant_domains)} consonant random domains and saved to {filename}")
    
    vowel_domains = generate_vowel_domain(num_domains=5000)
    filename = "data_generalization/vowel_random_domains.csv"
    save_domains_to_csv(vowel_domains, filename)
    print(f"Generated {len(vowel_domains)} vowel random domains and saved to {filename}")
    
    numeric_domains = generate_numeric_domain(num_domains=5000)
    filename = "data_generalization/numeric_random_domains.csv"
    save_domains_to_csv(numeric_domains, filename)
    print(f"Generated {len(numeric_domains)} numeric random domains and saved to {filename}")
    
    random_domains = generate_random_domain(TLDS, num_domains=5000)
    filename = "data_generalization/alphanumeric_domains.csv"
    save_domains_to_csv(random_domains, filename)
    print(f"Generated {len(random_domains)} random domains and saved to {filename}")
    
    alphabetic_domains = generate_random_alphabetic_domain(num_domains=5000)
    filename = "data_generalization/alphabetic_domains.csv"
    save_domains_to_csv(alphabetic_domains, filename)
    print(f"Generated {len(alphabetic_domains)} alphabetic domains and saved to {filename}")


if __name__ == "__main__":
    main()
