import json, re
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction


class Example(object):
    """A single training/test example."""
    def __init__(self,
                 idx,
                 nl,
                 code,
                 ):
        self.idx = idx
        self.nl = nl
        self.code = code


def read_examples(filename):
    """Read examples from filename."""
    examples=[]
    with open(filename,encoding="utf-8") as f:
        for idx, line in enumerate(f):
            js = json.loads(line)
            if js['intent'].strip() == '' or js['snippet'].strip() == '':
                continue
            examples.append(
                Example(idx = idx, nl = js['intent'], code = js['snippet']) 
            )
            
    return examples



class InputFeatures(object):
    """A single training/test features for a example."""
    def __init__(self,
                 example_id,
                 input_ids,
                 labels
    ):
        self.example_id = example_id
        # self.nl_ids = nl_ids
        # self.code_ids = code_ids
        self.input_ids = input_ids
        self.labels = labels
        assert len(input_ids) == len(labels)


def tokenize_code(code, ignore_symbols=False):
    code = re.sub(r'([^A-Za-z0-9_])', r' \1 ', code)
    code = re.sub(r'([a-z])([A-Z])', r'\1 \2', code)
    code = re.sub(r'\s+', ' ', code)
    code = code.replace('"', '`')
    code = code.replace('\'', '`')
    tokens = [t for t in code.split(' ') if t]

    if ignore_symbols:
        tokens = [t for t in tokens if t.isalnum()]

    return tokens


def clean_code(code):
    code = code.replace('```python', '')
    code = code.replace('```', '')
    return code.strip()

