import numpy as np
import typing
import dataclasses as dc
import itertools
import random
from typing import Any, TypeVar, TypedDict, Sequence, Literal

from core.reasoning import CoT, Reasoner, RewardModel
from core.reasoning.reflection.self_verify import KwReflCumrewEvaluator
from .gen_puzzle_v2 import generate_sudoku_puzzle


type In = str
type Out = str | Literal["FAILURE"]


@dc.dataclass
class SudokuCoT[Thought](CoT[In, Thought, Out]):
    correct: bool | None = None


_SudokuThought = TypeVar('_SudokuThought')
SudokuSolver = Reasoner[In, _SudokuThought, Out]


def generate_instances(
    sudoku_solver: SudokuSolver | None,
    blanks: int,  # 生成的数独中空白的数量
    n: int,  # 生成n个实例
    discard_failure: bool = True,
    verbose = 0,  # ???
) -> list[SudokuCoT]:
    """generate n samples"""

    samples: list[SudokuCoT] = []
    failures = 0
    # 获取sudoku数据
    while len(samples) < n:
        if sudoku_solver is not None:
            puzzle = generate_sudoku_puzzle(blanks)
            sample = SudokuCoT(puzzle, *sudoku_solver(puzzle))
        else:
            puzzle, solution = generate_sudoku_puzzle(blanks, require_solution=True)
            sample = SudokuCoT(puzzle, None, solution)
        
        assert isinstance(sample, SudokuCoT)
        if sample.outcome == "FAILURE":
            failures += 1
        if not discard_failure or sample.outcome != "FAILURE":
            samples.append(sample)

        if verbose >= 1:
            print("Generated %d / %d CoTs for Sudosu puzzles with %d blanks, with %d failures"
                  % (len(samples), n, blanks, failures), end='\r')

    if verbose >= 1:
        print()
        print(f"{len(samples)} CoTs have been generated for Sudosu puzzles with {blanks} blanks.")

    return samples


class SudokuEvaluator(KwReflCumrewEvaluator):

    def _kwmap(self, **references):
        input: str = references["input"]
        assert len(input) == 81
        n_blanks = input.count('0')
        return ("%d blanks" % n_blanks, "all")
