def events():

    p = open("Crowdflower_Comparisons/EventTime/events.txt")
    first_line = True

    Matrix = {}

    correct = 0
    incorrect = 0
    count = 0
    category_nodes = {}
    sole_nodes = []
    lose_count = 1
    for line in p:
        sline = line.strip().split(",")
        if first_line == True:

            first_line = False

        else:
            count += 1
            try:
                first_node = int(sline[-2])
                second_node = int(sline[-1])
                tainted = sline[6]
                if tainted != "false":
                    continue

                if first_node not in sole_nodes:
                    sole_nodes.append(first_node)
                if second_node not in sole_nodes:
                    sole_nodes.append(second_node)
                category = sline[15]

                if (first_node, second_node) not in category_nodes:
                    category_nodes[(first_node, second_node)] = []
                    category_nodes[(first_node, second_node)].append(category)
                else:
                    category_nodes[(first_node, second_node)].append(category)

            except Exception as e:

                lose_count += 1

    results = {}
    for key in list(category_nodes.keys()):
        if len(category_nodes[key]) > 3:
            candidate_dataset = category_nodes[key]
        else:
            candidate_dataset = category_nodes[key]

        for item in candidate_dataset:
            cate1_count = 0
            cate2_count = 0
            if item == "category1":
                cate1_count += 1
                if (key[0] - 1, key[1] - 1) not in Matrix:
                    Matrix[(key[0] - 1, key[1] - 1)] = 1
                else:
                    Matrix[(key[0] - 1, key[1] - 1)] += 1
            elif item == "category2":
                cate2_count += 1
                if (key[1] - 1, key[0] - 1) not in Matrix:
                    Matrix[(key[1] - 1, key[0] - 1)] = 1
                else:
                    Matrix[(key[1] - 1, key[0] - 1)] += 1

        if cate1_count > cate2_count:
            results[key] = "category1"
        else:
            results[key] = "category2"

    last_count = 0
    for key in list(results.keys()):
        last_count += 1
        if (results[key] == "category1" and key[0] < key[1]) or (
            results[key] == "category2" and key[0] > key[1]
        ):
            correct += 1
        else:
            incorrect += 1

    lost_pair = []

    for i in range(len(sole_nodes)):
        for j in range(i + 1, len(sole_nodes)):
            nodeA = sole_nodes[i]
            nodeB = sole_nodes[j]
            if nodeA != nodeB:
                if (nodeA, nodeB) in results or (nodeB, nodeA) in results:
                    continue
                else:
                    lost_pair.append((nodeA, nodeB))

    return results, Matrix


def pairwise_preference():

    results, Matrix = events()

    relations = []
    for key in list(results.keys()):
        if results[key] == "category1":
            relations.append((key[0] - 1, key[1] - 1))
        else:
            relations.append((key[1] - 1, key[0] - 1))

    correct_count = 0
    incorrect_count = 0
    total_count = 0
    for item in relations:
        total_count += 1
        if item[0] < item[1]:
            correct_count += 1
        else:
            incorrect_count += 1

    sole_nodes = []
    for item in relations:
        if item[0] not in sole_nodes:
            sole_nodes.append(item[0])
    sole_nodes = list(range(len(sole_nodes)))

    return sole_nodes, relations, Matrix


def filter_events():
    import math

    ini_nodes, ini_edges, ini_Matrix = pairwise_preference()

    nodes = []
    reduced_edges = []

    for edge in ini_edges:
        if edge[0] < edge[1]:
            reduced_edges.append(edge)

    for edge in reduced_edges:
        if edge[0] not in nodes:
            nodes.append(edge[0])
        if edge[1] not in nodes:
            nodes.append(edge[1])
    nodes_times = {}
    for node in nodes:
        nodes_times[node] = 0

    for edge in reduced_edges:
        nodes_times[edge[0]] += 1
        nodes_times[edge[1]] += 1

    sorted_score = sorted(list(nodes_times.items()), key=lambda x: x[1], reverse=True)
    result = []
    for item in sorted_score:
        result.append(item[0])

    final_nodes = []
    final_edges = []
    for item in result[160:260]:
        final_nodes.append(item)

    final_nodes.sort()

    for edge in ini_edges:
        if edge[0] in final_nodes and edge[1] in final_nodes:
            if edge[0] > edge[1]:
                if math.fabs(edge[0] - edge[1]) < 80:
                    final_edges.append(edge)
            else:
                final_edges.append(edge)

    final_Matrix = {}
    for key in ini_Matrix:
        if key[0] in final_nodes and key[1] in final_nodes:
            final_Matrix[key] = ini_Matrix[key]

    mapping = {}
    count = 0
    for item in final_nodes:
        mapping[item] = count
        count += 1

    used_edges = []
    used_Matrix = {}

    used_nodes = list(range(len(final_nodes)))
    for edge in final_edges:
        used_edges.append((mapping[edge[0]], mapping[edge[1]]))
    for key in final_Matrix:
        used_Matrix[(mapping[key[0]], mapping[key[1]])] = final_Matrix[key]

    print(used_nodes)

    count = 0
    correct = 0
    for edge in used_edges:
        count += 1
        if edge[0] < edge[1]:
            correct += 1

    print(correct / float(count))
    print(len(used_edges))

    return used_nodes, used_edges, used_Matrix


filter_events()


def statue_process_original():

    p = open("/Users/frank/EXP/CrowdDataset/statue/statue_full.txt")
    nodes = list(range(100))
    edges = []
    Matrix = {}
    for nodeA in nodes:
        for nodeB in nodes:
            Matrix[(nodeA, nodeB)] = 0

    first_line_flag = True
    history_record = {}
    for line in p:
        if first_line_flag:
            first_line_flag = False
            continue
        sline = line.strip().split()
        nodeA = int(sline[1])
        nodeB = int(sline[2])
        category = sline[0]

        if (nodeA, nodeB) not in history_record:
            history_record[(nodeA, nodeB)] = []
            history_record[(nodeA, nodeB)].append(category)
        else:
            history_record[(nodeA, nodeB)].append(category)

    for key in history_record:
        if history_record[key][0] == "category1":
            edges.append((key[1] - 1, key[0] - 1))
        else:
            edges.append((key[0] - 1, key[1] - 1))

    p.close()
    return nodes, edges


def statue_process():

    p = open("/Users/frank/EXP/CrowdDataset/statue/statue_full.txt")
    nodes = list(range(100))
    edges = []
    Matrix = {}
    for nodeA in nodes:
        for nodeB in nodes:
            Matrix[(nodeA, nodeB)] = 0

    first_line_flag = True
    history_record = {}
    for line in p:
        if first_line_flag:
            first_line_flag = False
            continue
        sline = line.strip().split()
        nodeA = int(sline[1])
        nodeB = int(sline[2])
        category = sline[0]

        if (nodeA, nodeB) not in history_record:
            history_record[(nodeA, nodeB)] = []
            history_record[(nodeA, nodeB)].append(category)
        else:
            history_record[(nodeA, nodeB)].append(category)

    for key in history_record:
        count_category1 = 0
        count_category2 = 0
        for item in history_record[key]:
            if item == "category1":
                count_category1 += 1
                Matrix[(key[1] - 1, key[0] - 1)] += 1
            elif item == "category2":
                count_category2 += 1
                Matrix[(key[0] - 1, key[1] - 1)] += 1
        if count_category1 > count_category2:
            edges.append((key[1] - 1, key[0] - 1))
        else:
            edges.append((key[0] - 1, key[1] - 1))

    p.close()
    return nodes, edges, Matrix


if __name__ == "__main__":
    nodes, edges, Matrix = statue_process()
    count = 0
    correct = 0
    for edge in edges:
        count += 1
        if edge[0] < edge[1]:
            correct += 1
    print(correct / float(count))


def portrait_process():
    import random

    p = open("/Users/frank/EXP/CrowdDataset/portrait/fulldataset.txt")

    dataset = []

    for line in p:
        sline = line.strip().split(",")

        dataset.append(sline)

    dataset_unit = {}

    for iter in range(1, len(dataset)):

        dataset_unit[iter] = {}
        dataset_unit[iter]["golden"] = dataset[iter][2]
        dataset_unit[iter]["tainted"] = dataset[iter][6]
        dataset_unit[iter]["trust"] = dataset[iter][8]
        dataset_unit[iter]["category"] = dataset[iter][15]
        dataset_unit[iter]["rating__difficultly_of_the_question"] = dataset[iter][16]
        dataset_unit[iter]["category_gold"] = dataset[iter][17]
        dataset_unit[iter]["id1"] = dataset[iter][18]
        dataset_unit[iter]["id2"] = dataset[iter][19]
        dataset_unit[iter]["image_url1"] = dataset[iter][20]
        dataset_unit[iter]["image_url2"] = dataset[iter][21]
        dataset_unit[iter]["any_comments"] = dataset[iter][14]

        if len(dataset[iter][14]) > 0:

            dataset_unit[iter]["golden"] = dataset[iter][2]
            dataset_unit[iter]["tainted"] = dataset[iter][6]
            dataset_unit[iter]["trust"] = dataset[iter][8]
            dataset_unit[iter]["category"] = dataset[iter][-7]
            dataset_unit[iter]["rating__difficultly_of_the_question"] = dataset[iter][
                -6
            ]
            dataset_unit[iter]["id1"] = dataset[iter][-4]
            dataset_unit[iter]["id2"] = dataset[iter][-3]
            dataset_unit[iter]["image_url1"] = dataset[iter][-2]
            dataset_unit[iter]["image_url2"] = dataset[iter][-1]
            dataset_unit[iter]["any_comments"] = dataset[iter][14]

    exp_dataset = []

    for key in list(dataset_unit.keys()):

        count = (
            key,
            dataset_unit[key]["trust"],
            dataset_unit[key]["rating__difficultly_of_the_question"],
            dataset_unit[key]["category"],
            dataset_unit[key]["id1"],
            dataset_unit[key]["id2"],
        )

        exp_dataset.append(count)

    #     print len(exp_dataset)

    pair_count = {}
    pair_relations = {}
    sole_nodes = []

    for item in exp_dataset:

        if item[4] not in sole_nodes:
            sole_nodes.append(item[4])
        if item[5] not in sole_nodes:
            sole_nodes.append(item[5])

        if (item[4], item[5]) in pair_count:
            pair_count[(item[4], item[5])] += 1
            pair_relations[(item[4], item[5])].append(item[3])
        if (item[4], item[5]) not in pair_count:
            pair_count[(item[4], item[5])] = 1
            pair_relations[(item[4], item[5])] = []
            pair_relations[(item[4], item[5])].append(item[3])

    NUMBER = 1
    result = {}
    M = {}
    sole_nodes_int = []
    for item in sole_nodes:
        sole_nodes_int.append(int(item))
    sole_nodes_int.sort()
    mapping = {}
    for i in range(len(sole_nodes_int)):
        mapping[str(sole_nodes_int[i])] = i

    for key in list(pair_count.keys()):
        NUMBER += 1

        count_category1 = 0
        count_category2 = 0

        for item in pair_relations[key]:
            if item == "category1":
                count_category1 += 1
            if item == "category2":
                count_category2 += 1

        if count_category1 > count_category2:
            result[(mapping[key[0]], mapping[key[1]])] = "category1"
        else:
            result[(mapping[key[0]], mapping[key[1]])] = "category2"

    for key in list(pair_relations.keys()):
        for item in pair_relations[key]:
            if item == "category1":
                if (mapping[key[1]], mapping[key[0]]) not in M:
                    M[(mapping[key[1]], mapping[key[0]])] = 1
                elif (mapping[key[1]], mapping[key[0]]) in M:
                    M[(mapping[key[1]], mapping[key[0]])] += 1
            if item == "category2":
                if (mapping[key[0]], mapping[key[1]]) not in M:
                    M[(mapping[key[0]], mapping[key[1]])] = 1
                elif (mapping[key[0]], mapping[key[1]]) in M:
                    M[(mapping[key[0]], mapping[key[1]])] += 1

    NUMBER = 1
    for key in list(result.keys()):
        #         print NUMBER,
        #         print key,result[key]
        NUMBER += 1

    #     print '------------wrong pairs------------'

    wrong_pairs = []

    for key in list(result.keys()):
        if int(key[0]) > int(key[1]) and result[key] == "category2":
            #             print key,result[key]
            wrong_pairs.append((key, result[key]))
        if int(key[0]) < int(key[1]) and result[key] == "category1":
            #             print key,result[key]
            wrong_pairs.append((key, result[key]))

    #     print sole_nodes
    #     print len(sole_nodes)

    #     for i in range(len(sole_nodes)):
    #         for j in range(len(sole_nodes)):
    #             if i!=j:
    #                 if (sole_nodes[i],sole_nodes[j]) not in result.keys() and (sole_nodes[j],sole_nodes[i]) not in result.keys():
    #                     print 'losing pairs'
    #                     print (sole_nodes[i],sole_nodes[j])

    directed_edges = []
    for key in list(result.keys()):
        if result[key] == "category2":
            directed_edges.append(key)
        if result[key] == "category1":
            directed_edges.append((key[1], key[0]))

    percent_edges = random.sample(directed_edges, len(directed_edges))
    for i in range(len(sole_nodes_int)):
        for j in range(len(sole_nodes_int)):
            if (i, j) not in percent_edges:
                M[(i, j)] = 0

    return list(range(len(sole_nodes_int))), directed_edges, M


def statue_process():

    p = open("/Users/frank/Documents/TOPKEXP/DATASET/statue_completed/statue_full.txt")
    dataset = {}
    result = {}
    sole_nodes = []

    for line in p:
        sline = line.strip().split()
        if sline[2] not in sole_nodes:
            sole_nodes.append(sline[2])
        if sline[3] not in sole_nodes:
            sole_nodes.append(sline[3])

        if (sline[2], sline[3]) not in dataset:
            dataset[(sline[2], sline[3])] = []
            dataset[(sline[2], sline[3])].append(sline[0])

    for key in list(dataset.keys()):
        category1_count = 0
        category2_count = 0

        for item in dataset[key]:
            if item == "category1":
                category1_count += 1
            if item == "category2":
                category2_count += 1

        if category1_count > category2_count:
            result[key] = "category1"
        else:
            result[key] = "category2"

    return sole_nodes, result


def last50():

    p = open("/Users/frank/EXP/CrowdDataset/portraitLast50/portraitlast50.txt")
    first_line = True
    edges = []
    Matrix = {}
    history = {}
    nodes = list(range(51, 101))
    for nodeA in nodes:
        for nodeB in nodes:
            Matrix[(nodeA, nodeB)] = 0

    for line in p:
        if first_line:
            first_line = False
            sline = line.strip().split(",")
            for item in enumerate(sline):
                print(item)
        else:
            sline = line.strip().split(",")
            url1 = sline[-2]
            url2 = sline[-1]
            xurl1 = int(url1.split("men/files/m-")[1].split(".")[0])
            xurl2 = int(url2.split("men/files/m-")[1].split(".")[0])
            category = sline[-5]

            if category == "category1":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category1")
                else:
                    history[(xurl1, xurl2)].append("category1")

                Matrix[(xurl1, xurl2)] += 1

            elif category == "category2":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category2")
                else:
                    history[(xurl1, xurl2)].append("category2")

                Matrix[(xurl2, xurl1)] += 1

    for key in list(history.keys()):
        category_count1 = 0
        category_count2 = 0
        for item in history[key]:
            if item == "category1":
                category_count1 += 1
            elif item == "category2":
                category_count2 += 1
        if category_count1 > category_count2:
            edges.append(key)
        else:
            edges.append((key[1], key[0]))

    p.close()
    used_edges = []
    used_Matrix = {}
    mapping = {}
    count = 0

    for item in nodes:
        mapping[item] = count
        count += 1
    for edge in edges:
        used_edges.append((mapping[edge[0]], mapping[edge[1]]))
    for key in list(Matrix.keys()):
        used_Matrix[(mapping[key[0]], mapping[key[1]])] = Matrix[key]
    used_nodes = list(range(50))

    return used_nodes, used_edges, used_Matrix


def last50_original():

    p = open("/Users/frank/EXP/CrowdDataset/portraitLast50/portraitlast50.txt")
    first_line = True
    edges = []
    Matrix = {}
    history = {}
    nodes = list(range(51, 101))
    for nodeA in nodes:
        for nodeB in nodes:
            Matrix[(nodeA, nodeB)] = 0

    for line in p:
        if first_line:
            first_line = False
            sline = line.strip().split(",")
            for item in enumerate(sline):
                print(item)
        else:
            sline = line.strip().split(",")
            url1 = sline[-2]
            url2 = sline[-1]
            xurl1 = int(url1.split("men/files/m-")[1].split(".")[0])
            xurl2 = int(url2.split("men/files/m-")[1].split(".")[0])
            category = sline[-5]

            if category == "category1":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category1")
                else:
                    history[(xurl1, xurl2)].append("category1")

                Matrix[(xurl1, xurl2)] += 1

            elif category == "category2":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category2")
                else:
                    history[(xurl1, xurl2)].append("category2")

                Matrix[(xurl2, xurl1)] += 1

    for key in list(history.keys()):
        if history[key][0] == "category1":
            edges.append(key)
        else:
            edges.append((key[1], key[0]))

    p.close()
    used_edges = []
    used_Matrix = {}
    mapping = {}
    count = 0

    for item in nodes:
        mapping[item] = count
        count += 1
    for edge in edges:
        used_edges.append((mapping[edge[0]], mapping[edge[1]]))
    for key in list(Matrix.keys()):
        used_Matrix[(mapping[key[0]], mapping[key[1]])] = Matrix[key]
    used_nodes = list(range(50))

    return used_nodes, used_edges


def mall_process():

    p = open("/Users/frank/EXP/CrowdDataset/mall_dataset 2/crowdflower/mall.txt")

    first_line = True

    edges = []
    Matrix = {}

    history = {}

    nodes = list(range(39))
    for nodeA in nodes:
        for nodeB in nodes:
            Matrix[(nodeA, nodeB)] = 0

    for line in p:
        if first_line:
            first_line = False
            sline = line.strip().split(",")
            for item in enumerate(sline):
                print(item)
        else:
            sline = line.strip().split(",")
            url1 = sline[-2]
            url2 = sline[-1]
            xurl1 = int(url1.split("mall_dataset/")[1].split(".")[0])
            xurl2 = int(url2.split("mall_dataset/")[1].split(".")[0])
            category = sline[-5]

            if category == "category1":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category1")
                else:
                    history[(xurl1, xurl2)].append("category1")

                Matrix[(xurl1, xurl2)] += 1

            elif category == "category2":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category2")
                else:
                    history[(xurl1, xurl2)].append("category2")

                Matrix[(xurl2, xurl1)] += 1

    for key in list(history.keys()):
        category_count1 = 0
        category_count2 = 0

        for item in history[key]:
            if item == "category1":
                category_count1 += 1
            elif item == "category2":
                category_count2 += 1

        if category_count1 > category_count2:
            edges.append(key)
        else:
            edges.append((key[1], key[0]))

    p.close()

    return nodes, edges, Matrix


def mall_process_original():

    p = open("/Users/frank/EXP/CrowdDataset/mall_dataset 2/crowdflower/mall.txt")

    first_line = True

    edges = []
    Matrix = {}

    history = {}

    nodes = list(range(39))
    for nodeA in nodes:
        for nodeB in nodes:
            Matrix[(nodeA, nodeB)] = 0

    for line in p:
        if first_line:
            first_line = False
            sline = line.strip().split(",")
            for item in enumerate(sline):
                print(item)
        else:
            sline = line.strip().split(",")
            url1 = sline[-2]
            url2 = sline[-1]
            xurl1 = int(url1.split("mall_dataset/")[1].split(".")[0])
            xurl2 = int(url2.split("mall_dataset/")[1].split(".")[0])
            category = sline[-5]

            if category == "category1":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category1")
                else:
                    history[(xurl1, xurl2)].append("category1")

                Matrix[(xurl1, xurl2)] += 1

            elif category == "category2":
                if (xurl1, xurl2) not in history:
                    history[(xurl1, xurl2)] = []
                    history[(xurl1, xurl2)].append("category2")
                else:
                    history[(xurl1, xurl2)].append("category2")

                Matrix[(xurl2, xurl1)] += 1

    for key in list(history.keys()):

        if history[key][0] == "category1":
            edges.append(key)
        else:
            edges.append((key[1], key[0]))

    p.close()

    return nodes, edges
