import sys
from .custom_encoders import *
from .lpg_dataset import LPGDataset


class TwitterTrollsDataset(LPGDataset):
	node_encoding_functions = {
		"labels": (lambda vertices: category_encoding(vertices, "labels")),
		"description": (lambda vertices: text_encoding_onehot(vertices, "description", keep_most_frequent=100)),
		"favouritescount": (lambda vertices: integer_encoding(vertices, "favouritescount")),
		"followerscount": (lambda vertices: integer_encoding(vertices, "followerscount")),
		"friendscount": (lambda vertices: integer_encoding(vertices, "friendscount")),
		"lang": (lambda vertices: category_encoding(vertices, "lang")),
		"listedcount": (lambda vertices: integer_encoding(vertices, "listedcount")),
		"location": (lambda vertices: category_encoding(vertices, "location")),
		"name": (lambda vertices: category_encoding(vertices, "name")),
		"screenname": (lambda vertices: category_encoding(vertices, "screenname")),
		"statusescount": (lambda vertices: integer_encoding(vertices, "statusescount")),
		"userkey": (lambda vertices: category_encoding(vertices, "userkey")),
		"verified": (lambda vertices: bool_encoding(vertices, "verified")),
		"timezone": (lambda vertices: category_encoding(vertices, "timezone")),
		"text": (lambda vertices: text_encoding_onehot(vertices, "text", keep_most_frequent=100)),
		"favoritecount": (lambda vertices: integer_encoding(vertices, "favoritecount")),
		"retweetcount": (lambda vertices: integer_encoding(vertices, "retweetcount")),
		"retweeted": (lambda vertices: bool_encoding(vertices, "retweeted")),
		"expandedurl": (lambda vertices: bool_encoding(vertices, "expandedurl")),
		"tag": (lambda vertices: category_encoding(vertices, "tag")),
	}
	edge_encoding_functions = {}

	@property
	def raw_file_names(self):
		return ['twitter-trolls_vertices.csv', 'twitter-trolls_edges.csv']

	def download(self):
		print("ERROR: Twitter-Trolls dataset is not present in raw folder")
		sys.exit()
