import torch


def encode(tokenizer, text, split_digit: bool = True):
    tokens = tokenizer.tokenize(text)
    outs = []
    for i, token in enumerate(tokens):
        if token.isdigit():
            if split_digit:
                outs += list(token)
            else:
                outs.append(token)
        else:
            outs.append(token)
    tensor = torch.tensor(tokenizer.convert_tokens_to_ids(outs), dtype=torch.long)
    return tensor


def build_inputs(
    tokenizer, texts, padding_side="left", split_digit=True, device="cuda"
):
    input_ids = [encode(tokenizer, text, split_digit) for text in texts]

    if padding_side == "left":
        input_ids = [x.flip(0) for x in input_ids]
        input_ids = torch.nn.utils.rnn.pad_sequence(
            input_ids, batch_first=True, padding_value=tokenizer.pad_token_id
        )
        input_ids = input_ids.flip(1)
    else:
        input_ids = torch.nn.utils.rnn.pad_sequence(
            input_ids, batch_first=True, padding_value=tokenizer.pad_token_id
        )
    attention_mask = (input_ids != tokenizer.pad_token_id).type(torch.long)
    inputs = {
        "input_ids": input_ids.to(device),
        "attention_mask": attention_mask.to(device),
    }
    return inputs
