""" Get the dense/sparse features of movies using TMDB API """
import sys

sys.path.insert(0, "../")

import os
import json
import requests
import argparse
import numpy as np
import pandas as pd
from multi_threading import RunInParallel
from ganrl.commons.args import PARAMS


class TMDB:
    """ Example Response: See self.get_movie_info(movie_id=tmdbid)
    {
       "original_language":"en",
       "poster_path":"/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg",
       "video":false,
       "title":"Toy Story",
       "tagline":"",
       "runtime":81,
       "overview":"Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear
                   onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when
                   circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside
                   their differences.",
       "spoken_languages":[
          {
             "english_name":"English",
             "name":"English",
             "iso_639_1":"en"
          }
       ],
       "backdrop_path":"/3Rfvhy1Nl6sSGJwyjb0QiZzZYlB.jpg",
       "genres":[
          {
             "id":16,
             "name":"Animation"
          },
          {
             "id":12,
             "name":"Adventure"
          },
          {
             "id":10751,
             "name":"Family"
          },
          {
             "id":35,
             "name":"Comedy"
          }
       ],
       "belongs_to_collection":{
          "backdrop_path":"/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg",
          "poster_path":"/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg",
          "id":10194,
          "name":"Toy Story Collection"
       },
       "status":"Released",
       "production_countries":[
          {
             "iso_3166_1":"US",
             "name":"United States of America"
          }
       ],
       "homepage":"http://toystory.disney.com/toy-story",
       "adult":false,
       "revenue":373554033,
       "budget":30000000,
       "release_date":"1995-10-30",
       "id":862,
       "production_companies":[
          {
             "logo_path":"/1TjvGVDMYsj6JBxOAkUHpPEwLf7.png",
             "origin_country":"US",
             "id":3,
             "name":"Pixar"
          }
       ],
       "imdb_id":"tt0114709",
       "original_title":"Toy Story",
       "vote_count":13611,
       "popularity":101.768,
       "vote_average":7.9
    }
    """

    def __init__(self, token):
        self.headers_ = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json;charset=utf-8'}
        self.base_url_ = 'https://api.themoviedb.org/3/'
        self.img_base_url_ = 'https://image.tmdb.org/t/p/w500'

    def _json_by_get_request(self, url, params={}):
        _cnt = 0
        status_code = 401
        while status_code != 200:
            res = requests.get(url, headers=self.headers_, params=params)
            status_code = res.status_code
            _cnt += 1
            if _cnt > 5: break
            # if status_code != 200: print(res)
        return json.loads(res.text), res.status_code

    def search_movies(self, query):
        params = {'query': query}
        url = '{}search/movie'.format(self.base_url_)
        return self._json_by_get_request(url, params)

    def get_movie_info(self, movie_id):
        url = '{}movie/{}'.format(self.base_url_, movie_id)
        return self._json_by_get_request(url)

    def get_movie_genre_list(self):
        """ https://developers.themoviedb.org/3/genres/get-movie-list """
        url = '{}/genre/movie/list'.format(self.base_url_)
        res, status_code = self._json_by_get_request(url)
        return res["genres"]

    def get_genre_master(self):
        res = self.get_movie_genre_list()
        return {genre_info["name"]: key for key, genre_info in enumerate(res)}

    def get_movie_feature(self, movie_id, dict_genre_master):
        num_movies = max(dict_genre_master.values())
        res, status_code = self.get_movie_info(movie_id=movie_id)
        if (status_code == 200) and len(res["genres"]):  # sometimes a movie doesn't have any associated genre
            _genre_one_hot = list()
            for genre_info in res["genres"]:
                genre_id = dict_genre_master[genre_info["name"]]
                _genre_one_hot.append(np.eye(num_movies + 1)[genre_id, :])
            _sparse_f = np.sum(_genre_one_hot, axis=0)
            _dense_f = [res[key] * 1.0 for key in PARAMS.GET_MOVIE_FEATURES_DENSE_COL]
            _release_date = res["release_date"]
        else:
            _sparse_f = np.zeros(num_movies + 1)
            _dense_f = [0.0 for _ in PARAMS.GET_MOVIE_FEATURES_DENSE_COL]
            _release_date = None
        return np.asarray(_sparse_f, dtype=np.float), np.asarray(_dense_f, dtype=np.float), _release_date


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--pp_num_threads', type=int, default=6, help='num of threads')
    args = parser.parse_args()

    # Get the API key: https://www.themoviedb.org/settings/api
    # Set the env var in advance!!
    TOKEN = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJiNDE0NDIzOTJiOWE0MGU4MTRhNTY1YzMxYTQ3NjZjYyIsInN1YiI6IjYwMWI1OTJkMjgzZWQ5MDA0MGU1NmQwMyIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.golMLytLsx300LipjdsOQ1PhSRQkkSHLeNfbjZFoFe4"
    # TOKEN = os.getenv("TOKEN", None)
    api = TMDB(token=TOKEN)

    dict_genre_master = api.get_genre_master()
    print(dict_genre_master)

    df_links = pd.read_csv("../ml-latest-small/links.csv")
    list_df_links = np.array_split(df_links, args.pp_num_threads)


    def _fn(_df):
        dict_sparse_f, dict_dense_f, dict_release_date = dict(), dict(), dict()
        for row in _df.iterrows():
            sparse_f, dense_f, release_date = api.get_movie_feature(movie_id=row[1]["tmdbId"],
                                                                    dict_genre_master=dict_genre_master)
            dict_sparse_f[int(row[1]["itemId"])] = sparse_f
            dict_dense_f[int(row[1]["itemId"])] = dense_f
            dict_release_date[int(row[1]["itemId"])] = release_date
        return dict_sparse_f, dict_dense_f, dict_release_date


    from functools import partial

    fns = [partial(_fn, _df=list_df_links[i]) for i in range(len(list_df_links))]
    result = RunInParallel(fns=fns)

    for i, k in enumerate(sorted(result.keys())):
        dict_sparse_f, dict_dense_f, dict_release_date = result[k]
        if i == 0:
            df_sparse = pd.DataFrame(dict_sparse_f.items(), columns=["itemId", "sparse_f"])
            df_dense = pd.DataFrame(dict_dense_f.items(), columns=["itemId", "dense_f"])
            df_release_date = pd.DataFrame(dict_release_date.items(), columns=["itemId", "release_date"])
        else:
            _df_sparse = pd.DataFrame(dict_sparse_f.items(), columns=["itemId", "sparse_f"])
            _df_dense = pd.DataFrame(dict_dense_f.items(), columns=["itemId", "dense_f"])
            _df_release_date = pd.DataFrame(dict_release_date.items(), columns=["itemId", "release_date"])
            df_sparse = pd.concat([df_sparse, _df_sparse], ignore_index=True)
            df_dense = pd.concat([df_dense, _df_dense], ignore_index=True)
            df_release_date = pd.concat([df_release_date, _df_release_date], ignore_index=True)
        print(k, df_sparse.shape, df_dense.shape, df_release_date.shape)

    df_sparse.to_csv("./ml-latest-small/sparse_feature.csv", index=False)
    df_dense.to_csv("./ml-latest-small/dense_feature.csv", index=False)
    df_release_date.to_csv("./ml-latest-small/release_date.csv", index=False)
