import pandas

import base64
import hashlib

def derive_key(password: str, length: int) -> bytes:
    """Derive a fixed-length key from the password using SHA256."""
    hasher = hashlib.sha256()
    hasher.update(password.encode())
    key = hasher.digest()
    return key * (length // len(key)) + key[: length % len(key)]

def decrypt(ciphertext_b64: str, password: str) -> str:
    """Decrypt base64-encoded ciphertext with XOR."""
    encrypted = base64.b64decode(ciphertext_b64)
    key = derive_key(password, len(encrypted))
    decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
    return decrypted.decode()

# df = pandas.read_csv(
#             "https://openaipublic.blob.core.windows.net/simple-evals/browse_comp_test_set.csv"
#         )
# examples = [row.to_dict() for _, row in df.iterrows()]

# row = examples[0]

# problem = decrypt(row.get("problem", ""), row.get("canary", ""))
# answer = decrypt(row.get("answer", ""), row.get("canary", ""))

# print(problem)
# print(answer)
# print(row['problem_topic'])

class BrowseCompDataset():
    def __init__(self):
        df = pandas.read_csv(
            "https://openaipublic.blob.core.windows.net/simple-evals/browse_comp_test_set.csv"
        )
        self.examples = [row.to_dict() for _, row in df.iterrows()]
    
    def _decrypt_example(self, example):
        """Helper method to decrypt a single example."""
        decrypted_example = example.copy()
        decrypted_example['problem'] = decrypt(example.get("problem", ""), example.get("canary", ""))
        decrypted_example['answer'] = decrypt(example.get("answer", ""), example.get("canary", ""))
        return decrypted_example
    
    def get_available_topics(self):
        """Get a list of all available topics in the dataset."""
        topics = set()
        for example in self.examples:
            topic = example.get('problem_topic')
            if topic:
                topics.add(topic)
        return sorted(list(topics))
    
    def get_dataset_size(self):
        """Get the total number of examples in the dataset."""
        return len(self.examples)

    def get_full_set(self):
        """Get an iterator of the full set with decrypted problems and answers."""
        for example in self.examples:
            yield self._decrypt_example(example)

    def get_by_topic(self, topic: str):
        """Get an iterator of the examples with the given topic."""
        for example in self.examples:
            if example.get('problem_topic') == topic:
                yield self._decrypt_example(example)