"""
Finds large independent set in graph G where nodes are binary strings of length n.
Nodes in G are connected if they share a subsequence of length at least n-s. 

Improve the priority_v5 function over its previous versions. 
Return only the body of the function as valid Python code, without the function header, markdown, formatting, comments, or explanations. 
Do not include any code block markers such as ```python or ```. 
Keep the code short.
"""
import itertools
import hashlib
import numpy as np
import networkx as nx
import lmdb
import json
import os


def load_graph(graph_db_path):
    """ Load the graph from an LMDB database. """
    G = nx.Graph()
    graph_env = lmdb.open(graph_db_path, readonly=True, lock=False)

    with graph_env.begin() as txn:
        cursor = txn.cursor()
        for key, value in cursor:
            node = key.decode()
            neighbors = json.loads(value.decode())
            for neighbor in neighbors:
                G.add_edge(node, neighbor)

    graph_env.close()
    return G


def hash_priority_mapping(priorities, sequences):
    """ Generate a hash based on the mapping of sequences to their priority scores. """
    mapping = [(seq, priorities[seq]) for seq in sequences]
    mapping_sorted = sorted(mapping, key=lambda x: x[0])  # Sort by sequence
    mapping_str = ','.join(f'{seq}:{score}' for seq, score in mapping_sorted)
    return hashlib.sha256(mapping_str.encode()).hexdigest()


def evaluate(params, graph_dir):
    """ Run the solver and compute the independent set size, hash, and VT overlap. """
    n, s = params
    vt_dir = os.getenv("VT_DIR", "/default/path/to/vt")
    vt_file_path = os.path.join(vt_dir, "fundcc", "specifications", "gpt", "challenge_vtcodes", "vt_solutions.json")
    vt_codebook = load_vt_codebook(vt_file_path)  # Load VT solutions
    independent_set, hash_value = solve(n, s, graph_dir)

    # Compute VT overlap
    vt_overlap = compute_overlap(independent_set, vt_codebook, n)

    return len(independent_set), hash_value, vt_overlap


def solve(n, s, graph_dir):
    """ Find a large independent set in a loaded graph while avoiding unnecessary copies. """
    path = os.path.join(graph_dir, f"graph_s{s}_n{n}.lmdb")

    G = load_graph(path)  # Load the graph directly, no copying
    G_for_priority = G.copy()  

    sequences = [''.join(seq) for seq in itertools.product('01', repeat=n)]
    priorities = {node: priority(node, G_for_priority, n, s) for node in G.nodes}

    # Sort nodes first by priority (higher is better), then by lexicographic order (ascending)
    nodes_sorted = sorted(G.nodes, key=lambda x: (-priorities[x], x))

    independent_set = set()
    for node in nodes_sorted:
        if node not in G:
            continue
        independent_set.add(node)
        neighbors = list(G.neighbors(node))
        G.remove_node(node)  # Remove the node from the original graph
        G.remove_nodes_from(neighbors)  # Remove its neighbors

    hash_value = None
    if n == 9:
        hash_value = hash_priority_mapping(priorities, sequences)

    return independent_set, hash_value


def load_vt_codebook(vt_file_path):
    """ Load VT codebook from a JSON file. """
    with open(vt_file_path, "r") as f:
        vt_solutions = json.load(f)

    # Convert keys to integers for proper indexing
    vt_solutions = {int(k): v for k, v in vt_solutions.items()}
    return vt_solutions

def compute_overlap(independent_set, vt_codebook, n):
    """ Compute the percentage of overlap between independent set and VT codebook for given n. """
    if n not in vt_codebook:
        return 0.0  # No VT codebook available for this n

    vt_set = set(vt_codebook[n])  # Convert VT codebook entries to a set
    overlap_count = sum(1 for seq in independent_set if seq in vt_set)

    return round((overlap_count / len(vt_set)) * 100, 2)


def priority(node, G, n, s):
    """ Returns the priority with which we want to add `node` to the independent set. """
    return 0.0  
