import csv
import os
import pandas as pd

# we insert the full flickr in QQP
FLICKR_PATH = "../flickr30k/flickr-train1+2-shuffled.csv"
QQP_PATH = "../qqp/qqp_train.csv"




# load the data
base_data = pd.read_csv(
    FLICKR_PATH,
    sep=',', quoting=csv.QUOTE_ALL, header=None)

insert_data = pd.read_csv(
    QQP_PATH,
    sep=',', quoting=csv.QUOTE_ALL, header=None)


base_data.columns = ['A', 'B']
insert_data.columns = ['A', 'B']

# concatenate and shuffle for random training
mixed_data = pd.concat([base_data, insert_data])
mixed_data = mixed_data.sample(frac=1)

mixed_data.to_csv(f'./shuffled-data/full_flickr_mixed_into_QQP.csv', index=False, quoting=csv.QUOTE_ALL, header=False)

