import json
import pickle
from collections import defaultdict, Counter

import networkx as nx
from tqdm import tqdm


def main():
    # 初始化存储结构
    reviews_by_user = defaultdict(list)
    genres_by_book = {}
    authors_by_book = {}
    descriptions_by_book = {}

    # 先加载所有评论
    review_info_path = 'children_genre/raw/goodreads_reviews_children.json'
    with open(review_info_path, 'r') as f:
        for line in f:
            review = json.loads(line)
            user_id = review['user_id']
            reviews_by_user[user_id].append(review)

    # 然后确定前十个不同的user_id(scale up 时删除即可）
    user_counter = Counter([review['user_id'] for reviews in reviews_by_user.values() for review in reviews])

    # 处理题材数据
    genre_info_path = 'children_genre/raw/goodreads_book_genres_initial.json'
    with open(genre_info_path, 'r') as f:
        for line in f:
            genre_info = json.loads(line)
            book_id = genre_info['book_id']
            genres = genre_info['genres']
            main_genre = max(genres, key=genres.get) if genres else None
            if main_genre:
                genres_by_book[book_id] = main_genre

    # 加载书籍信息并为选中的评论添加作者信息和描述信息
    book_info_path = 'children_genre/raw/goodreads_books_children.json'
    with open(book_info_path, 'r') as f:
        for line in f:
            book = json.loads(line)
            book_id = book['book_id']
            authors_by_book[book_id] = [author['author_id'] for author in book.get('authors', [])]
            descriptions_by_book[book_id] = book.get('description', '')

    # 准备最终数据集，只包括前十个user的评论
    final_data = []
    for user_id in list(user_counter.keys()):
        for review in reviews_by_user[user_id]:
            book_id = review['book_id']
            record = {
                'user_id': user_id,
                'book_id': book_id,
                'review_text': review['review_text'],
                'genre': genres_by_book.get(book_id, None),  # 添加题材信息
                'description': descriptions_by_book.get(book_id, '')  # 添加书籍描述信息
            }
            final_data.append(record)

    userbook2review = {}
    bookgenre2review = {}
    for item in final_data:
        user_id = item['user_id']
        book_id = item['book_id']
        genre = item['genre']
        userbook2review[user_id + '|' + book_id] = item['review_text']
        bookgenre2review[book_id + '|' + genre] = item['description']

    G = nx.Graph()
    genres = {'history, historical fiction, biography': 0,
              'children': 1,
              'romance': 2,
              'comics, graphic': 3,
              'non-fiction': 4,
              'mystery, thriller, crime': 5,
              'poetry': 6,
              'young-adult': 7,
              'fiction': 8,
              'fantasy, paranormal': 9,
              'None': 10}
    user_id2idx = {}
    book_id2idx = {}

    # 添加节点和边
    for item in tqdm(final_data):
        user_id = item['user_id']
        book_id = item['book_id']
        genre = item['genre']

        if user_id not in user_id2idx:
            user_id2idx[user_id] = len(user_id2idx)
        if book_id not in book_id2idx:
            book_id2idx[book_id] = len(book_id2idx)

        user_id = "user_" + str(user_id2idx[user_id])
        book_id = "book_" + str(book_id2idx[book_id])
        genre = "genre_" + str(genres[genre])

        G.add_node(user_id, type='user', color='blue', label=user_id)
        G.add_node(book_id, type='book', color='red', label=book_id)
        G.add_node(genre, type='genre', color='yellow', label=genre)

        G.add_edge(user_id, book_id, color='magenta', text=userbook2review[item['user_id'] + '|' + item['book_id']])
        G.add_edge(book_id, genre, color='black', text=bookgenre2review[item['book_id'] + '|' + item['genre']])

    pickle_file_path = "children_genre/raw/nx_graph.pkl"
    with open(pickle_file_path, 'wb') as f:
        pickle.dump(G, f)


if __name__ == "__main__":
    main()
