import qrcode
import numpy as np
import nltk
from nltk.corpus import words
import random
import pandas as pd
import os
from datetime import datetime
from utils import change_box_size, convert_to_image


version = 3
error_correction = qrcode.constants.ERROR_CORRECT_L
box_size = 1
border = 0
mask_pattern = 0
data_dir = f'data_domain_ver{version}_mask{mask_pattern}_formatinfo'

qr = qrcode.QRCode(
    version=version,
    error_correction=error_correction,
    box_size=box_size,
    border=border,
    mask_pattern=mask_pattern,
)

# nltk.download('words')
# all_words = set(words.words())
# all_words = list(all_words)
# all_words.sort()

df = pd.read_csv('./data/top-1m.csv')
all_words = df['domain'].values
all_words = list(all_words)

# df = pd.read_csv('./data/varied_fake_strings.csv')
# all_words = df['domain'].values
# all_words = list(all_words)

num_data = len(all_words)
# word_index = random.sample(range(num_data), num_data)
# random_index = random.sample(range(len(all_words)), num_data)

if not os.path.exists(data_dir):
    os.makedirs(data_dir)
    os.makedirs(f'{data_dir}/sample')
    print(f"created {data_dir} directory.")

input_texts = []
target_texts = []

for i in range(num_data):

    if qr.version != version:
        qr = qrcode.QRCode(
            version=version,
            error_correction=error_correction,
            box_size=box_size,
            border=border,
            mask_pattern=mask_pattern,
        )

    # index = random_index[i]
    content = all_words[i]

    qr.add_data(content)
    qr.make(fit=True)

    data = np.array(qr.modules).astype(int).flatten()

    size = np.sqrt(len(data)).astype(int)
    if box_size > 1:
        data = change_box_size(data, box_size)

    data = "".join(map(str, data))

    if qr.version == version:
        input_texts.append(data)
        target_texts.append(content)
    else:
        print(qr.version, content)

    if i < 100:
        qr.make_image(fill_color="black", back_color="white").save(f'./{data_dir}/sample/{i}.png')

    qr.clear()

data = {'target': target_texts, 'input': input_texts}
df = pd.DataFrame(data)
df.to_csv(f'./{data_dir}/dataset.csv', index=False)

with open(f'./{data_dir}/setting.txt', 'w') as f:
    f.write(f'date and time: {datetime.now()}\n')
    f.write(f'num_data: {len(df)}\n')
    f.write(f'version: {version}\n')
    f.write(f'error_correction: {error_correction}\n')
    f.write(f'box_size: {box_size}\n')
    f.write(f'border: {border}\n')
    f.write(f'mask_pattern: {mask_pattern}\n')
    f.write(f'data_dir: {data_dir}\n')
