from argparse import ArgumentParser
from cmath import nan
import csv
from sklearn import neighbors
import pandas as pd
import json
import pdb
import jsonlines

parser = ArgumentParser()
parser.add_argument('--msmarco_path', required=True)
parser.add_argument('--mesh_path', required=True)
parser.add_argument('--wiki_path', required=True)
parser.add_argument('--save_to_path', required=True)
args = parser.parse_args()

msmarco_df=pd.read_csv(args.msmarco_path, sep='\t',header=None,names=['d-id', 'title','text'])
mesh_df=pd.read_csv(args.mesh_path, sep='\t',header=None,names=['d-id', 'title','text'])
wiki_df=pd.read_csv(args.wiki_path, sep='\t',header=None,names=['d-id', 'title','text'])
msmarco_df['name']='maarco'
mesh_df['name']='meesh'
wiki_df['name']='wiikki'
mesh_df['d-id']+=len(msmarco_df)
combined_df=pd.concat([msmarco_df,mesh_df])
wiki_df['d-id']+=len(combined_df)-1
# wiki_df['d-id']+=len(combined_df)
combined_df=pd.concat([combined_df,wiki_df])
print("begin writing")
combined_df.to_csv(args.save_to_path,header=False, columns=['d-id','title','text','name'], index=False, sep='\t')
print("finish combininig all grounding corpus!")