import json
import os
import requests

from machiavelli.utils import get_conditional_group, get_nested_group, get_scenes_from_scenelist

from asteval import valid_symbol_name


class Scraper:
    def __init__(self, url, session):
        self.url = "https://www.choiceofgames.com/" + url
        self.scenes = {}
        self.scene_list = ["startup"]
        self.unused_scenes = []
        self.label_to_linenum = {}
        self.metadata = {}
        self.kw_vars = {}
        self.session = session

    def process(self, scene_name):
        print(scene_name)
        raw = self.session.get(os.path.join(
            self.url, "scenes/{}.txt".format(scene_name))).content
        raw = raw.decode("utf-8").split("\n")

        lines = []
        for line in raw:
            if len(line.lstrip()) == 0:
                continue
            wspace = len(line) - len(line.lstrip())
            lines.append([line.strip().replace('\ufeff', ''), wspace])

        if len(lines) == 1:
            print(lines)
            assert False

        lines_ = []
        for i, line_ in enumerate(lines):
            line = line_[0]
            tmp = line.lower()
            if tmp.startswith("*label"):
                name = line.replace("*label", "").strip()
                assert "*label" not in name
                label = f"{scene_name}_{name}".lower()
                self.label_to_linenum[label] = i
            elif tmp.startswith(("*goto_scene", "*gosub_scene")):
                maybe_new_scene = line.split(" ")[1]
                if maybe_new_scene not in self.scenes and \
                    maybe_new_scene not in self.unused_scenes and \
                        maybe_new_scene != scene_name and \
                    '{' not in maybe_new_scene:
                    self.unused_scenes.append(maybe_new_scene)
            elif tmp.startswith("*title"):
                self.metadata["title"] = line.split("*title ")[1]
            elif tmp.startswith("*author"):
                self.metadata["author"] = line.split("*author ")[1]
            elif tmp.startswith("*scene_list"):
                scene_list, _ = get_nested_group(lines[i:])
                self.unused_scenes = get_scenes_from_scenelist(scene_list)
                self.scene_list += list(self.unused_scenes)
            elif tmp.startswith("*elsif"):
                line_[0] = line_[0].replace("*elsif", "*elseif")
            # Fix typos or keyword variables
            if tmp.startswith(("*create", "*set", "*temp")):
                if tmp.startswith("*set_checkpoint"):
                    lines_.append(line_)
                    continue
                split = line_[0].split()
                # Fix typos
                if tmp.startswith(("*temp", "*set")):
                    if len(split) == 2:
                        for c in ['%', '+', '-', '*', '/', '"']:
                            a = split[1].split(c, 1)
                            if len(a) == 2:
                                if c == '"':
                                    a[1] = '"' + a[1]
                                split = [split[0], a[0], a[1]]
                                break
                    # Manually skip this line
                    # if split[1] != 'import_herotrack':
                    #print(tmp, split)
                    assert len(split) >= 3 or line.startswith("*temp")
                    line_[0] = " ".join(split)
                # Clean variables that are keywords
                var = split[1].lower()
                if not valid_symbol_name(var) and not tmp.startswith("*setref"):
                    # Prob don't need this because we manually fix kw_vars
                    print(f'----->{var}')
                    if '{' not in var:
                        self.kw_vars[var] = None
            lines_.append(line_)
        return lines_

    def save_game(self, filename):
        del self.session
        del self.unused_scenes
        print(f'Saving to {filename}')
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(self.__dict__, f, indent=2)

    def scrape_game(self):
        self.scenes["startup"] = self.process("startup")
        while len(self.unused_scenes) > 0:
            scene_name = self.unused_scenes.pop(0)
            self.scenes[scene_name.lower()] = self.process(scene_name)

    def scrape_summary(self):
        scene = self.process("startup")
        summary = ""
        for i, line in enumerate(scene):
            line = line[0].lower()
            if line.startswith("*title"):
                title = scene[i][0].replace("*title", "").strip()
            if not line.startswith("*comment") and \
                "choice_purchase_supported" in line or \
                    "choice_purchased_supported" in line:
                lines = get_conditional_group(scene[i:])[0]
                for maybe_text in lines[1:]:
                    if not maybe_text[0].startswith("*"):
                        if not (maybe_text[0].startswith('[i]') and maybe_text[0].endswith('[/i]')):
                            summary += maybe_text[0] + "\n"
                    elif maybe_text[0].startswith(("*comment", "*image", "*youtube", "*text_image", "*if choice_is_web", "*if choice_purchase_supported")):
                        continue
                    else:
                        return summary[:-1], title
        return summary[:-1], title

from machiavelli.config import MachiavelliConfig
cfg = MachiavelliConfig()

session = requests.Session()
EMAIL = ### YOUR CHOICES OF GAMES EMAIL ###
PASSWORD = ### YOUR CHOICE OF GAMES PASSWORD ###
p = session.post('https://www.choiceofgames.com/api/login.php',
                 data={'email': EMAIL, 'password': PASSWORD})
assert p.status_code == 200

summaries = {}
for game in cfg.working_games:
    print(f"****** Extracting Adventure {game} ***********")
    scraper = Scraper(game, session)
    summary, title = scraper.scrape_summary()
    game = game.replace("/", "_")
    summaries[game] = (summary, title)
    scraper.save_game(f"game_data/source/{game}.json")
with open("game_data/game_summaries.json", 'w', encoding='utf-8') as f:
    json.dump(summaries, f, indent=2)
