{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from autogen import GroupChat,GroupChatManager\n",
    "# from customegroupchat import customised_groupchat, customised_groupchatmanager\n",
    "from agents import DEA, MLA, IA, BOA, CDA, KIA, ERA, DJE, user_proxy\n",
    "from LLM_config import llm_config\n",
    "from utils import generate_prompt, validate_json_output, StateTracker, validate_phase_transition, remove_thinking_output\n",
    "\n",
    "\n",
    "tracker = StateTracker()\n",
    "\n",
    "# DEA.register_hook(hookable_method=\"process_message_before_send\",hook=validate_delegator_message)\n",
    "DEA.register_hook(hook = remove_thinking_output, hookable_method = \"process_message_before_send\")\n",
    "DEA.register_hook(hook=tracker.track_proposals, hookable_method=\"process_message_before_send\")\n",
    "DEA.register_hook(hook=validate_phase_transition, hookable_method=\"process_message_before_send\")\n",
    "DJE.register_hook(hook = validate_json_output, hookable_method = \"process_message_before_send\")\n",
    "\n",
    "worker_counter = 0\n",
    "# turn_counter = -1\n",
    "def custom_speaker_selection_func(last_speaker, groupchat: GroupChat):\n",
    "    workers = [BOA, DEA, MLA, IA]\n",
    "    # if \"final\" groupchat.last_message\n",
    "    if last_speaker is user_proxy: \n",
    "        return CDA\n",
    "    if last_speaker is CDA: \n",
    "        global worker_counter\n",
    "        w = workers[worker_counter%4]\n",
    "        worker_counter += 1\n",
    "        return w\n",
    "    elif last_speaker in workers:\n",
    "        if worker_counter %4 == 0:\n",
    "            return KIA\n",
    "        else: \n",
    "            return CDA\n",
    "    elif last_speaker is KIA:\n",
    "            return ERA\n",
    "    elif last_speaker is ERA:\n",
    "            return CDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "group_chat = GroupChat(\n",
    "    [CDA]+[DEA, MLA, IA, BOA, KIA, ERA, DJE, user_proxy],\n",
    "    messages=[],\n",
    "    select_speaker_message_template=generate_prompt(\"prompts/select_speaker_message_template.prompt\"),\n",
    "    select_speaker_prompt_template=generate_prompt(\"prompts/select_speaker_prompt_template.prompt\"),\n",
    "    select_speaker_auto_multiple_template=generate_prompt(\"prompts/select_speaker_auto_multiple_template.prompt\"),\n",
    "    select_speaker_auto_none_template=generate_prompt(\"prompts/select_speaker_auto_none_template.prompt\"),\n",
    "    speaker_selection_method=custom_speaker_selection_func,\n",
    "    # speaker_selection_method=lambda **kwargs: (\n",
    "    #     # Force proposal order\n",
    "    #     ['MachineLearningEngineer', 'InfrastructureEngineer',\n",
    "    #      'DataEngineer', 'BusinessObjectiveEngineer'] if tracker.state['proposals'] < 4 \n",
    "    #     else kwargs['agents']  # Release control after 4 proposals\n",
    "    # ),\n",
    "    # phase_lock={\n",
    "    #     'proposal': {'min_messages': 4, 'max_messages': 4},\n",
    "    #     'discussion': {'min_messages': 12, 'max_messages': 12},\n",
    "    #     'consensus': {'min_messages': 4}\n",
    "    # },\n",
    "    max_round=80,\n",
    "    allow_repeat_speaker=False,\n",
    "    # termination_condition=lambda x: \"PIPELINE_OVERVIEW.json\" in x[-1][\"content\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "chat_manager = GroupChatManager(groupchat=group_chat)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "generated_request = \"\"\"This discussion session is set up to discuss the best data pipeline for a real time data intensive machine learning training and inference self driving application. The goal is to discuss and find consensus on how to set up the data pipeline, including each component in the datapipeline. \n",
    "You can assume that we have access to aws. \n",
    "\n",
    "**Data Description:**\n",
    "Real-time data of cars driving in street. \n",
    "There are 6 camera sources with data in .jpg format; 1 lidar source in .pcd.bin format; and 5 radar sources with data in .pcd format. \n",
    "\n",
    "**Discussion and Design:**\n",
    "- Emphasise comprehensive understanding of the data sources, processing requirements, and desired outcomes.\n",
    "- Encourage each other to engage in an open discussion on potential technologies, components, and architectures that can handle the diverse data streams and real-time nature of the data.\n",
    "- Keep the conversation on design and evaluating the pros and cons of different design choices, considering scalability, maintainability, and cost-effectiveness.\n",
    "- The team should agrees on a final architectural design, justifying the choices made.\n",
    "- The team should produce the required the document PIPELINE_OVERVIEW.json.\n",
    "\n",
    "**Final Output:**\n",
    "- Produce a concise summary of the agreed-upon pipeline architecture, highlighting its key components and connections.\n",
    "- Provide a high-level plan and rationale for the design, explaining why it is well-suited for the given data and use case.\n",
    "- Estimate the cloud resources, implementation efforts, and associated costs, providing a rough breakdown and complexity rating.\n",
    "- Generate a `PIPELINE_OVERVIEW.json` file, detailing the proposed complete architecture in JSON format with the following fields: \n",
    " - “Platform“: A cloud service provider’s name if the cloud solution is the best, or “local server” if locally hosted servers are preferred. \n",
    " - “Component 1”: The first component in the pipeline framework. \n",
    " - “Component 2”: The second component in the pipeline framework. Continue until all required components are listed. \n",
    " - “Implementation difficulties\": A rating from 1 to 10 (lowest to highest). \n",
    " - “Maintainess difficulties”: A rating from 1 to 10 (lowest to highest). \n",
    "\n",
    "**Instructions:**\n",
    "- Remember, this is a collaborative design discussion, not a project execution. Refrain from assigning tasks with deadlines.\n",
    "- Keep the conversation focused on architectural choices, technologies, and potential challenges.\n",
    "- Emphasize the importance of a well-thought-out design.\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "groupchat_result = user_proxy.initiate_chat(\n",
    "    chat_manager, message=generated_request\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "request = \"\"\"This is a discuss thread. \n",
    "** DO NOT attempt to set up any component or environment, DO NOT attempt to write code for any component. DO NOT implememnt any designs. **\n",
    "Discuss the requirements and possible technologies needed for the design of a scalable and practical data pipeline architecture for a real-time data-intensive application, where all input data and files are saved upon arrival, can be processed to a suitable format, and can be used in downstream machine learning tasks. \n",
    "Data description: Real-time data of cars driving in street. There are 6 camera sources with data in .jpg format; 1 lidar source in .pcd.bin format; and 5 radar sources with data in .pcd format. \n",
    "Note that you can access AWS cloud service providers. \n",
    "** DO NOT attempt to set up any component or environment, DO NOT attempt to write code for any component. DO NOT implememnt any designs. **\n",
    "\n",
    "There should be data ingestion, storage, extraction, cleaning, transformation, reshaping, exporting, visualising, monitoring, conduct machine learning experiments, and future inference from the data ingested.\n",
    "\n",
    "This step is focused on the architectural design, meaning choosing the components and deciding on the connections among components. DO NOT PRODUCE ANY CODE or IMPLEMENTATION. \n",
    "\n",
    "Ensure the architecture uses up-to-date technologies, is scalable, and can be easily modified and updated in the future. \n",
    "Ensure the effectiveness and efficiency and stability of the architecture. \n",
    "** DO NOT attempt to set up any component or environment, DO NOT attempt to write code for any component. DO NOT implememnt any designs. **\n",
    "\n",
    "Discuss among yourselves on the possible solutions. Discuss of the pros and cons of each components proposed. \n",
    "\n",
    "After you agree to the solutions and components that should be used, generate a final response together. \n",
    "Ensure the final response includes paragraphs and file in the following format: \n",
    "1.  A few paragraphs briefly discuss your intuitions and understanding of the data provided, with the following details:\n",
    " - Detail your high-level plan, necessary design choices and ideal structural pipeline proposal. \n",
    " - Justify how the design is better suited for the provided data and data description. \n",
    " - Estimate the cloud compute and storage requirement, implementation requirement and difficulties, and cost in dollars associated with the structure. \n",
    "2) <PIPELINE_OVERVIEW.json>: provide the new idea in JSON format with the following fields: \n",
    " - “Platform“: A cloud service provider’s name if the cloud solution is the best, or “local server” if locally hosted servers are preferred. \n",
    " - “Component 1”: The first component in the pipeline framework. \n",
    " - “Component 2”: The second component in the pipeline framework. Continue until all required components are listed. \n",
    " - “Implementation difficulties\": A rating from 1 to 10 (lowest to highest). \n",
    " - “Maintainess difficulties”: A rating from 1 to 10 (lowest to highest). \n",
    "\n",
    "DO NOT attempt to set up any component, DO NOT attempt to write code for any component.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "groupchat_result = user_proxy.initiate_chat(\n",
    "    chat_manager, message=request\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
