import io
import json
import random
import os
import asyncio
import transformers # For the tokenizer if generate_prompt needs it
from datasets import load_dataset
from tqdm import tqdm
from reindent import run as run_reindent # Assuming reindent.py is in your PYTHONPATH or same directory

# --- Start: Functions copied/adapted from APPS sample ---
def reindent_code(codestr):
    codestr = io.StringIO(codestr)
    ret = io.StringIO()
    run_reindent(
        codestr,
        ret,
        config={
            "dry-run": False, "help": False, "to": 10, "from": -1, "tabs": True,
            "encoding": "utf-8", "is-tabs": False, "tabsize": 10, "all-tabs": False
        }
    )
    return ret.getvalue()

def generate_prompt_from_apps(args_for_prompt, test_case_json, question_str, solutions_list, tokenizer, starter_code_str=None):
    """
    Adapted from the APPS sample script's generate_prompt.
    It constructs the prompt string that the original APPS evaluation uses.
    args_for_prompt should be a simple object or dict with 'peeking' and 'peek_frac' attributes.
    """
    _input = "\nQUESTION:\n"
    _input += question_str
    if starter_code_str:
        _input += "\n" + starter_code_str
    
    # test_case_json is already the parsed JSON for input_output
    if not test_case_json.get("fn_name"):
        _input += "\nUse Standard Input format"
    else:
        _input += "\nUse Call-Based format"
    
    _input += "\nANSWER:\n"

    sample_sol_for_peeking = None # Initialize, may not be used by your framework
    if hasattr(args_for_prompt, 'peeking') and args_for_prompt.peeking > 0.0 and solutions_list:
        # Original peeking logic (simplified, assumes solutions_list is already parsed)
        sample_sol_for_peeking = random.choice(solutions_list)
        reindented_sol = reindent_code(sample_sol_for_peeking)
        encoded_sol = tokenizer.encode(reindented_sol, verbose=False)
        
        peek_frac = 0.0
        if hasattr(args_for_prompt, 'peek_frac'):
            peek_frac = args_for_prompt.peek_frac
        else: # Default if not in args_for_prompt (original script gets it from cmd line args)
            peek_frac = args_for_prompt.peeking # Fallback, or set a sensible default like 0.1 or 0.2

        tokens_taken = int(peek_frac * len(encoded_sol))
        peek_ids = encoded_sol[:tokens_taken]
        _input += tokenizer.decode(peek_ids)
    
    return _input, sample_sol_for_peeking
# --- End: Functions copied/adapted from APPS sample ---


# Placeholder for your LCDP class (as you provided)
class LCDP:
    def __init__(self, api_key, model, max_workers, ignore_advice, use_pr_predictor):
        self.api_key = api_key
        self.model = model
        self.max_workers = max_workers
        self.ignore_advice = ignore_advice
        self.use_pr_predictor = use_pr_predictor
        # print(f"LCDP initialized with model: {self.model}") # Optional: for debugging

    async def run(self, task_description, max_iterations, num_plans, num_tests,
                  num_codes, refine_rounds, use_pass_rate_for_train,
                  test_timeout, use_async_generation, best_only):
        # print(f"\n--- Running LCDP for task ---") # Optional: for debugging
        # print(task_description[:300] + "...") # Print start of task
        # This is where your actual framework logic is called
        # For this example, we'll simulate and return a dummy code structure
        await asyncio.sleep(0.01) # Simulate async work
        # print(f"--- LCDP finished processing ---\n") # Optional: for debugging
        return {"code_id_0": {"code": f"# Placeholder code for task:\n{task_description[:100]}...\nprint('Hello from LCDP!')"}}

async def evaluate_lcdp_with_apps_prompts(args):
    argsdict = vars(args)
    print(f"Evaluation arguments: {argsdict}")

    problems = load_dataset("codeparrot/apps", split=f"{args.split}")

    # Initialize tokenizer (needed for generate_prompt_from_apps if peeking is used)
    # The original script uses args.arch for tokenizer type (e.g., "gpt2")
    try:
        tokenizer = transformers.AutoTokenizer.from_pretrained(args.tokenizer_arch)
    except Exception as e:
        print(f"Could not load tokenizer {args.tokenizer_arch}. Using gpt2 as fallback. Error: {e}")
        tokenizer = transformers.GPT2Tokenizer.from_pretrained("gpt2")


    # Initialize your LCDP framework
    lcdp_framework = LCDP(
        api_key=args.api_key,
        model=args.lcdp_model,
        max_workers=args.max_workers,
        ignore_advice=args.ignore_advice, # Assuming these come from args
        use_pr_predictor=args.use_pr_predictor
    )

    generated_codes_for_apps = {}
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir, exist_ok=True)

    output_file_path = os.path.join(args.save_dir, f"apps_{args.split}_lcdp_generated_codes.json")
    if args.limit_problems:
        problems = problems.select(range(args.limit_problems))

    # Create a simple namespace or dict for arguments needed by generate_prompt_from_apps
    prompt_args = type('Args', (object,), {
        'peeking': args.peeking_fraction, # e.g., 0.0 for no peeking, 0.1 for 10%
        'peek_frac': args.peeking_fraction # The original script used this name more directly with the fraction
    })()


    for index, problem_item in enumerate(tqdm(problems, desc="Processing APPS problems")):
        problem_id = problem_item.get("problem_id", f"problem_{index}")
        question = problem_item["question"]
        # Ensure input_output and solutions are loaded as JSON
        try:
            input_output_json = json.loads(problem_item["input_output"]) if problem_item["input_output"] else {}
            solutions_list = json.loads(problem_item["solutions"]) if problem_item["solutions"] else []
        except json.JSONDecodeError as e:
            print(f"Warning: Could not parse JSON for problem {problem_id}. Skipping. Error: {e}")
            input_output_json = {}
            solutions_list = []

        starter_code = problem_item.get("starter_code")

        # 2. Generate the prompt using the APPS-style function
        original_apps_prompt, _ = generate_prompt_from_apps(
            args_for_prompt=prompt_args,
            test_case_json=input_output_json,
            question_str=question,
            solutions_list=solutions_list,
            tokenizer=tokenizer,
            starter_code_str=starter_code
        )
        task_description_for_lcdp = original_apps_prompt

        if args.debug:
            print(f"\n--- Problem ID: {problem_id} ---")
            print("Original APPS Prompt (Task Description for LCDP):")
            print(task_description_for_lcdp)
            print("------------------------------------")

        # 3. Run your LCDP framework with this prompt
        try:
            best_codes_result = await lcdp_framework.run(
                task_description=task_description_for_lcdp,
                max_iterations=args.max_iterations,
                num_plans=args.num_plans,
                num_tests=args.num_tests,
                num_codes=args.num_codes,
                refine_rounds=args.refine_rounds,
                use_pass_rate_for_train=args.use_pass_rate_for_train,
                test_timeout=args.test_timeout,
                use_async_generation=args.use_async_generation,
                best_only=True, # As per your example
            )

            if best_codes_result and isinstance(best_codes_result, dict) and best_codes_result:
                first_code_id = next(iter(best_codes_result))
                generated_code_str = best_codes_result[first_code_id]['code']
            else:
                generated_code_str = "# LCDP Framework did not return a valid code."
                if args.debug:
                    print(f"Warning: No valid code returned for problem {problem_id}. Result: {best_codes_result}")

        except Exception as e:
            print(f"Error processing problem {problem_id} with LCDP framework: {e}")
            generated_code_str = f"# Error during LCDP execution: {e}"

        generated_codes_for_apps[str(problem_id)] = {
            "problem_id_original": problem_item.get("problem_id"), # Keep original ID if exists
            "question": question,
            "apps_prompt_used": task_description_for_lcdp, # Store the prompt you used
            "generated_code_lcdp": generated_code_str,
            "reference_solutions": solutions_list,
            "input_output": input_output_json,
            "starter_code": starter_code
        }

        if (index + 1) % args.save_interval == 0:
            print(f"\nSaving intermediate results ({index+1} problems processed)...")
            with open(output_file_path, "w") as f:
                json.dump(generated_codes_for_apps, f, indent=4)

    # Final save
    with open(output_file_path, "w") as f:
        json.dump(generated_codes_for_apps, f, indent=4)
    print(f"\nFinished processing all problems. Results saved to {output_file_path}")


async def main_async_wrapper():
    import argparse

    parser = argparse.ArgumentParser(description="Evaluate LCDP Multi-Agent Framework on APPS dataset using original APPS prompts.")
    # APPS dataset arguments
    parser.add_argument("--split", type=str, default="test", help="APPS dataset split to use (e.g., train, test).")
    parser.add_argument("--save_dir", type=str, default="./lcdp_apps_results_orig_prompt", help="Directory to save generated codes.")
    parser.add_argument("--limit_problems", type=int, default=None, help="Limit the number of problems to process (for testing).")
    parser.add_argument("--tokenizer_arch", type=str, default="gpt2", help="Tokenizer architecture for generate_prompt (e.g., gpt2, codet5-base).")
    parser.add_argument("--peeking_fraction", type=float, default=0.0, help="Fraction of solution to peek for prompt generation (0.0 for no peeking).")
    parser.add_argument("--save_interval", type=int, default=50, help="How often to save intermediate results (number of problems).")


    # LCDP Framework arguments (as per your example and potential needs)
    parser.add_argument("--api_key", type=str, required=True, help="API key for your LCDP framework.")
    parser.add_argument("--lcdp_model", type=str, default="gpt-3.5-turbo", help="Model to use within LCDP.")
    parser.add_argument("--max_iterations", type=int, default=2, help="Max iterations for LCDP.")
    parser.add_argument("--num_plans", type=int, default=3, help="Number of plans for LCDP.")
    parser.add_argument("--num_tests", type=int, default=3, help="Number of tests for LCDP (internal).")
    parser.add_argument("--num_codes", type=int, default=5, help="Number of codes to generate per plan for LCDP.")
    parser.add_argument("--refine_rounds", type=int, default=3, help="Refinement rounds for LCDP.")
    parser.add_argument("--test_timeout", type=int, default=10, help="Test timeout for LCDP.")
    parser.add_argument("--use_async_generation", action="store_true", help="Use async generation in LCDP.")
    parser.add_argument("--use_pass_rate_for_train", action="store_false", help="Use pass rate for train in LCDP (defaulting to False as per your example).") # Note: store_false for flags
    parser.add_argument("--max_workers", type=int, default=50)
    parser.add_argument("--ignore_advice", type=lambda x: (str(x).lower() == 'true'), default=True) # For boolean args
    parser.add_argument("--use_pr_predictor", type=lambda x: (str(x).lower() == 'true'), default=False)


    parser.add_argument("-d", "--debug", action="store_true", help="Enable debug printing.")

    args = parser.parse_args()

    await evaluate_lcdp_with_apps_prompts(args)

if __name__ == "__main__":
    # Ensure reindent.py is accessible
    # You might need to add its directory to sys.path or place it in the same directory
    # import sys
    # sys.path.append('/path/to/directory_containing_reindent_py')
    asyncio.run(main_async_wrapper())