"""
This script requires installation of: gseapy
"""

import os
import sys
import json
import pickle
import itertools
from datetime import datetime
from collections import defaultdict

import numpy as np
import pandas as pd
import gseapy as gp

import warnings
warnings.filterwarnings("ignore")


def main():
    np.random.seed(0)

    gene_lists = {
        "pert": "",
        "gene": ""
    }

    go_gsea = {}
    fp_kgs = [
        "data/kg/go_gsea.json",
        "data/kg/corum_gsea.json",
        "data/kg/reactome_gsea.json",
    ]
    for fp in fp_kgs:
        with open(fp) as f:
            go_gsea.update(json.load(f))
    # !!! replace this with your list of background genes
    with open("") as f:
        all_genes = json.load(f)
        all_genes = set(all_genes["gene_names"]).union(all_genes["cond_names"])
        all_genes = sorted(all_genes)
        print(all_genes[:5], len(all_genes))
    # loop through each!
    task_to_dfs = {}
    for task, lists in gene_lists.items():
        cur_dfs = []
        for gene_list in lists:
            try:
                df = gp.enrich(gene_list=gene_list,
                               gene_sets=go_gsea,
                               background=all_genes,
                               outdir=None).res2d
            except:
                print(gene_list)
                df = None
            cur_dfs.append(df)
        task_to_dfs[task] = cur_dfs

    with open(fp_out, "wb+") as f:
        pickle.dump(task_to_dfs, f)


if __name__ == "__main__":
    main()

