import random

class GDMBase:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def chunking(self, x, c, l):    
        n = len(x)
        positions = list(range(0, n - l + 1, c - l))
        
        if not positions:
            raise ValueError("Invalid chunk/overlap settings for the input sequence length.")

        p = random.choice(positions)
        
        # Initialize new sequence with placeholders (e.g., -1)
        x_tilde = [-1] * n
        
        # Copy chunk
        for i in range(p, min(p + c, n)):
            x_tilde[i] = x[i]
        
        # Fill placeholders with random tokens
        for i in range(n):
            if x_tilde[i] == -1:
                x_tilde[i] = random.randint(0, self.tokenizer.vocab_size - 1)
        
        return x_tilde

    def token_dropouts(self, x, d=2, option='deterministic'):    
        tokens = x.copy()
        n = len(tokens)
        
        if option == 'deterministic':
            r = random.randint(0, d - 1)
            for i in range(r, n, d):
                tokens[i] = random.randint(0, self.tokenizer.vocab_size - 1)
        elif option == 'randomized':
            for i in range(n):
                if random.random() < 1 / d:
                    tokens[i] = random.randint(0, self.tokenizer.vocab_size - 1)
        return tokens

    def casing_flips(self, x, p=0.9):
        # Decode to text
        decoded_text = self.tokenizer.decode(x, skip_special_tokens=True)
        char_list = list(decoded_text)

        for i in range(len(char_list)):
            if char_list[i].isalpha() and random.random() < p:
                if char_list[i].islower():
                    char_list[i] = char_list[i].upper()
                else:
                    char_list[i] = char_list[i].lower()
        
        # Re-encode to tokens
        flipped_text = ''.join(char_list)
        new_tokens = self.tokenizer(flipped_text, add_special_tokens=False)['input_ids']
        return new_tokens