import sys
from .custom_encoders import *
from .lpg_dataset import LPGDataset


class CitationsDataset(LPGDataset):
	node_encoding_functions = {
		"labels": (lambda vertices: category_encoding(vertices, "labels")),
		"index": (lambda vertices: text_encoding_onehot(vertices, "index", keep_most_frequent=10)),
		"title": (lambda vertices: text_encoding_onehot(vertices, "title", keep_most_frequent=100)),
		"year": (lambda vertices: integer_encoding(vertices, "year")),
		"abstract": (lambda vertices: text_encoding_onehot(vertices, "abstract", keep_most_frequent=100)),
		"name": (lambda vertices: text_encoding_onehot(vertices, "name", keep_most_frequent=10)),
		"ncitation": (lambda vertices: integer_encoding(vertices, "ncitation"))
	}
	edge_encoding_functions = {"type": (lambda edges: category_encoding(edges, "type"))}

	@property
	def raw_file_names(self):
		return ['citations_vertices.csv', 'citations_edges.csv']

	def download(self):
		print("ERROR: Citations dataset is not present in raw folder")
		sys.exit()
