from openai import AzureOpenAI
from agents.agent import Agent
import json
import os
import sys

import numpy as np
# import torch
# import torchvision.transforms as T
from PIL import Image
# from torchvision.transforms.functional import InterpolationMode
# from transformers import AutoModel, AutoTokenizer


class HumanAgent(Agent):
    def __init__(self, role="SOLVER") -> None:
        super(HumanAgent, self).__init__(role=role)
        # List of strings. Each element (string) is a message alternating between solver and expert
        self.conversation = []
        config = json.load(open(os.path.join("config", "keys.json")))

    def clear(self):
        self.conversation = []



    # Given conversation history, respond to message.
    def respond(self, image_data, actions, message=None):
        llm_input = self.get_conversation_history_string(
            image_data=image_data, actions=actions, message=message, model="human")
  
        sys.stdout.write(f"{llm_input}\nContinue the conversation: ")
        sys.stdout.flush()
        response = input()  # Use plain input for user response
        #response = input(f"{llm_input}\nContinue the conversation: ")
        predicted_action = response
        self.conversation.append(predicted_action)
        return predicted_action
