import json
from .probability_compare_dataset import ProbabilityCompareDataset
from ... import data_structures, utils
from typing import Optional
import os
import re
import numpy as np
from .probability_compare_dataset import ProbabilityCompareTest

# preprocessing based on https://github.com/EleutherAI/lm-evaluation-harness/blob/86319a9b14ddae2030bc6e0fdddd47fc7d0bb525/lm_eval/tasks/hellaswag/utils.py

class HellaSwag:
    URL = "https://raw.githubusercontent.com/rowanz/hellaswag/master/data/hellaswag_val.jsonl"
    CLEANUP_REGEX = re.compile(r"\\[.*?\\]")

    def __init__(self, vocabulary: data_structures.vocabulary.Vocabulary, cache_dir: str = "./cache") -> None:
        self.cache_dir = f"{cache_dir}/{self.__class__.__name__}/"
        os.makedirs(self.cache_dir, exist_ok=True)

        self.vocabulary = vocabulary
        if len(self.vocabulary) <= 256:
            self.dtype = np.uint8
        if len(self.vocabulary) < 32768:
            self.dtype = np.int16
        else:
            self.dtype = np.int32

        self.data = []

        with utils.LockFile(self.cache_dir+"lock"):
            self.download()

        self.load_dataset()

        self.maxlen = max(d["max_length"] for d in self.data)

    def __len__(self):
        return len(self.data)

    def download(self):
        if not os.path.exists(self.cache_dir+"data/hellaswag_val.json"):
            os.makedirs(self.cache_dir+"data/", exist_ok=True)
            utils.download(self.URL, self.cache_dir+"data/", ignore_if_exists=True)

    def preprocess(self, text):
        text = text.strip()
        # NOTE: Brackets are artifacts of the WikiHow dataset portion of HellaSwag.
        text = text.replace(" [title]", ". ")
        text = self.CLEANUP_REGEX.sub("", text)
        text = text.replace("  ", " ")
        return text

    def load_dataset(self):
        target = "hellaswag_val.jsonl"

        with open(f"{self.cache_dir}data/{target}", "r") as f:
            for line in f:
                line = json.loads(line)

                ctx = self.preprocess(line["activity_label"] + ": " + line["ctx_a"] + " " + line["ctx_b"].capitalize())
                ctx = self.vocabulary.sentence_to_indices(ctx)

                endings = [self.vocabulary.sentence_to_indices(" " + self.preprocess(e)) for e in line["endings"]]
                options = [ctx + endings[line["label"]]]
                for i, e in enumerate(endings):
                    if i != line["label"]:
                        options.append(ctx + e)

                assert len(options) == 4
                self.data.append({
                    "options": options,
                    "max_length": max(len(i) for i in options),
                    "prefix_length": len(ctx)
                })

    def __getitem__(self, idx):
        data = self.data[idx]

        res = {
            "sentence_good": np.array(data["options"][0], dtype=self.dtype),
            "good_len": len(data["options"][0]),
            "prefix_len": data["prefix_length"],
            "max_length": data["max_length"],
            "group": 0
        }

        for i, d in enumerate(data["options"][1:]):
            res[f"sentence_bad_{i}"] = np.array(d, dtype=self.dtype)
            res[f"bad_len_{i}"] = len(d)

        return res

    def start_test(self):
        return ProbabilityCompareTest(["val"], n_ways=4, normalize_by_length=True)



class HellaSwag1024(HellaSwag):
    def load_dataset(self):
        target = "hellaswag_val.jsonl"

        with open(f"{self.cache_dir}data/{target}", "r") as f:
            for line in f:
                line = json.loads(line)

                ctx = self.preprocess(line["activity_label"] + ": " + line["ctx_a"] + " " + line["ctx_b"].capitalize())
                ctx = self.vocabulary.sentence_to_indices(filler_text + ctx)

                endings = [self.vocabulary.sentence_to_indices(" " + self.preprocess(e)) for e in line["endings"]]
                options = [(ctx + endings[line["label"]])[-1024:]]
                for i, e in enumerate(endings):
                    if i != line["label"]:
                        options.append((ctx + e)[-1024:])

                assert len(options) == 4
                self.data.append({
                    "options": options,
                    "max_length": max(len(i) for i in options),
                    "prefix_length": len(ctx)
                })


filler_text = """In the early hours of a misty morning, the town of Eldervale awakened slowly beneath a soft veil of fog. The ancient cobblestone streets, lined with quaint lampposts and flowering window boxes, echoed the gentle murmurs of residents beginning their daily rituals. Neighbors greeted one another with warm smiles, their voices carrying stories of days gone by. Every corner of the town whispered hints of history, from the venerable clock tower that marked the passage of time to the ivy-clad facades of old inns. In this quiet haven, nature and human endeavor coexisted harmoniously, blending modern aspirations with traditions rooted in centuries of collective memory. The air, fresh and invigorating, filled the heart with both anticipation and calm, as if the day itself promised an unfolding narrative of subtle adventures and familiar comforts. Every stone in the pavement seemed to carry a memory, and the soft chatter of early market sellers intermingled with the rustling leaves overhead. The horizon, a tapestry of pale blues and gentle oranges, hinted at the possibilities that awaited as the day advanced. Even the quiet murmur of the nearby river contributed to the serene symphony that defined the town’s character. In the heart of Eldervale, life moved with deliberate grace, as residents balanced the demands of modernity with the comfort of longstanding traditions. The market square bustled with activity, where vendors displayed an array of colorful produce and handcrafted wares. Children laughed and played near the fountain, their carefree spirits infusing the scene with vitality. Meanwhile, elders sat on benches beneath ancient oak trees, recounting tales of yesteryears with voices rich in wisdom and experience. The architecture of the town told a story of generations; weathered bricks and ornate carvings celebrated both the passage of time and the resilience of the community. In every conversation, there was a sense of belonging and shared destiny, as if the fabric of life was woven with threads of history, hope, and the simple joys of everyday existence. A gentle breeze carried the scent of freshly baked bread and blooming flowers, binding the senses to the charm of a life well-lived. Beyond the boundaries of the town, the countryside unfolded in a mosaic of vibrant landscapes and quiet solitude. Rolling meadows, dotted with wildflowers and ancient stone markers, stretched toward distant mountains that touched the sky. Rustic farms and winding country roads told stories of toil and perseverance, where nature’s bounty was both a gift and a challenge. In these open spaces, time appeared to slow, inviting travelers to pause and reflect on the simple elegance of existence. Birds soared on gentle breezes while the earth, rich with history and nourished by countless seasons, whispered secrets of renewal and growth. Each element of the rural scene, from the glistening dew on blades of grass to the sturdy silhouette of an old barn, contributed to a harmonious portrait of life that balanced the quiet rhythms of nature with the enduring spirit of human endeavor. Within the vibrant tapestry of urban and rural life, artistic expression blossomed as a tribute to both heritage and innovation. In cozy cafes and lively galleries, painters, poets, and musicians gathered to share their visions, infusing the air with creative energy and thoughtful dialogue. Murals on brick walls captured the essence of the town’s soul, blending abstract forms with recognizable landscapes that echoed the past. The sound of a violin, echoing softly through a narrow alley, interwove with the rhythmic pulse of footsteps and whispered conversations. Creativity was celebrated not as an isolated act but as a communal ritual, where every brushstroke and note resonated with a deep sense of purpose. In every artful creation, there was a fusion of tradition and modernity, a dance of colors and ideas that transcended the boundaries of time and place, inviting all who witnessed it to experience a moment of shared wonder. As the day progressed, the interplay of light and shadow transformed the landscape into a living canvas. In sun-dappled courtyards and along meandering paths, individuals found solace in quiet contemplation and spontaneous encounters. The rustle of leaves, the distant hum of conversation, and the soft clinking of porcelain in a busy tea room all converged to create an atmosphere of serene introspection. Amid these moments, the boundaries between the ordinary and the extraordinary blurred, allowing a sense of magic to infuse even the most mundane experiences. Artists captured these ephemeral instants with words and images, striving to preserve the fleeting beauty of a transient world. Each moment was a delicate balance of warmth and melancholy, a reflection of the intricate dance between human emotion and the ever-changing rhythms of nature. As twilight descended upon the horizon, the town and countryside alike embraced a quiet transformation. The fading light bathed buildings and fields in a gentle glow, evoking feelings of nostalgia and contemplation. In the cool of the evening, families gathered in communal spaces, sharing stories and dreams beneath a tapestry of stars. The atmosphere was imbued with a reflective quality, as if the world itself paused to honor the ephemeral beauty of each passing moment. In the soft murmur of nighttime conversations and the rhythmic chirping of crickets, one could sense an underlying promise of renewal. Every whispered word, every shared glance, and every silent smile carried the weight of memories and the hope of tomorrow. In this serene interlude between day and night, the essence of life was celebrated in its quiet, profound simplicity. In the final moments before the deep embrace of night, the world seemed to hold its breath in quiet anticipation. The enduring pulse of life, both in bustling streets and secluded fields, resonated with the promise of a new day. Each moment was a silent testament to the beauty of existence, a gentle reminder that every ending carried the seed of a fresh beginning. As stars emerged one by one, their light mingling with the lingering warmth of dusk, the eternal cycle of hope and renewal was unmistakably affirmed. Embracing the tranquil energy of the twilight, the soul found solace in the profound unity of nature and human aspiration, gently bridging the gap between yesterday and tomorrow."""