# models/value_mapper.py
"""
Multi-core action mapping / masked-greedy mapper.

Given per-task Q-scores (for single-core action selection), this module
provides utility to map tasks to K cores at a single time step using an
iterative masked-greedy strategy:
  - at each iteration, pick the (task, core) pair with highest score
  - respect that each core can take at most one task and each task can be
    assigned to at most one core
  - supports masks for tasks that are not ready/available

API:
  mapper = MaskedGreedyMapper(n_tasks, n_cores)
  assignments = mapper.map(q_scores, task_mask)  # returns list of (core -> task_id or -1)
Notes:
  - q_scores: torch.Tensor shape [n_tasks] (higher is better)
  - task_mask: boolean list/array length n_tasks (True = available)
  - returns assignments list length n_cores with assigned task ids or -1
"""
from typing import List, Optional
import torch
import numpy as np

IDLE_TASK = -1

class MaskedGreedyMapper:
    def __init__(self, n_tasks: int, n_cores: int):
        self.n_tasks = n_tasks
        self.n_cores = n_cores

    def map(self, q_scores: torch.Tensor, task_mask: Optional[List[bool]] = None) -> List[int]:
        """
        q_scores: [n_tasks] tensor (torch or numpy). If torch, will be moved to CPU for selection.
        task_mask: optional boolean mask (True = available). Unavailable tasks are ignored.
        Returns assignments: list length n_cores -> task_id or IDLE_TASK (-1)
        """
        if isinstance(q_scores, torch.Tensor):
            scores = q_scores.detach().cpu().numpy().astype(float).copy()
        else:
            scores = np.asarray(q_scores, dtype=float).copy()
        if task_mask is None:
            available = np.ones(self.n_tasks, dtype=bool)
        else:
            available = np.asarray(task_mask, dtype=bool)
        assignments = [IDLE_TASK] * self.n_cores
        assigned_tasks = set()
        for c in range(self.n_cores):
            # mask unavailable and already assigned
            cand_indices = np.where(available & (~np.isin(np.arange(self.n_tasks), list(assigned_tasks))))[0]
            if cand_indices.size == 0:
                assignments[c] = IDLE_TASK
                continue
            # pick best among candidates
            cand_scores = scores[cand_indices]
            best_idx = cand_indices[int(np.argmax(cand_scores))]
            # if best score is negative (or below some threshold) you might prefer idle
            assignments[c] = int(best_idx)
            assigned_tasks.add(best_idx)
        return assignments
