"""
Scatter chart curriculum generator
"""

import os
import json
import copy
from typing import List, Dict
from metachart import MetaGenerator


class ScatterChartGenerator(MetaGenerator):
    def __init__(self, args, chart_id):
        super().__init__(args)
        self.chart_type = args.chart_type
        self.chart_id = chart_id
        self.all_qa_data_list = []
        self.round_num = 2
        self.qa_idx = 0
        self.detailed_reasoning = args.detailed_reasoning
        self.force_ground = args.force_ground
    


    ############################################################
    #   One-step Operator: h(list[Bar] | list[v]) → list[v]
    ############################################################
    
    def _one_step_statistics(self, chart_metadata: Dict):
        """
        Statistics: sum, mean, median, count
        """
        x_axis_title = chart_metadata['x_label']
        y_axis_title = chart_metadata['y_label']
        scatter_category_singular = chart_metadata['scatter_category']['singular']
        scatter_category_plural = chart_metadata['scatter_category']['plural']
        
        x_data_indices = [scatter_idx for scatter_idx in range(len(chart_metadata["scatter_x_data"]))]
        y_data_indices = [scatter_idx for scatter_idx in range(len(chart_metadata["scatter_y_data"]))]
        target_x_data = copy.deepcopy(chart_metadata["scatter_x_data"])
        target_y_data = copy.deepcopy(chart_metadata["scatter_y_data"])
        scatter_labels = copy.deepcopy(chart_metadata["scatter_labels"])

        # Read
        read_x_reason = '\n'.join([f"* {scatter_labels[scatter_idx]}: {target_x_data[scatter_idx]}" for scatter_idx in range(len(target_x_data))]).strip()
        read_y_reason = '\n'.join([f"* {scatter_labels[scatter_idx]}: {target_y_data[scatter_idx]}" for scatter_idx in range(len(target_y_data))]).strip()

        # Sum
        sum_x_answer = self._compute_data_sum(target_x_data)
        sum_x_reason = f"{'+'.join([str(ddd) for ddd in target_x_data])} = {sum_x_answer}"
        sum_y_answer = self._compute_data_sum(target_y_data)
        sum_y_reason = f"{'+'.join([str(ddd) for ddd in target_y_data])} = {sum_y_answer}"

        # Mean
        mean_x_answer = sum_x_answer / len(x_data_indices)
        mean_x_reason = f"{'+'.join([str(ddd) for ddd in target_x_data])}/{len(x_data_indices)} = {sum_x_answer}/{len(x_data_indices)} = {mean_x_answer}"
        mean_y_answer = sum_y_answer / len(y_data_indices)
        mean_y_reason = f"{'+'.join([str(ddd) for ddd in target_y_data])}/{len(y_data_indices)} = {sum_y_answer}/{len(y_data_indices)} = {mean_y_answer}"

        # Median
        median_x_value, median_x_indices = self._compute_data_median(target_x_data)
        median_y_value, median_y_indices = self._compute_data_median(target_y_data)

        # Median reasoning
        if len(target_x_data) % 2 == 1:
            median_x_reason = f"* Sort {x_axis_title} values: {sorted(target_x_data)}\n* The median is the middle value: {median_x_value}"
        else:
            sorted_x = sorted(target_x_data)
            mid1, mid2 = sorted_x[len(sorted_x)//2-1], sorted_x[len(sorted_x)//2]
            median_x_reason = f"* Sort {x_axis_title} values: {sorted_x}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_x_value}"
        
        if len(target_y_data) % 2 == 1:
            median_y_reason = f"* Sort {y_axis_title} values: {sorted(target_y_data)}\n* The median is the middle value: {median_y_value}"
        else:
            sorted_y = sorted(target_y_data)
            mid1, mid2 = sorted_y[len(sorted_y)//2-1], sorted_y[len(sorted_y)//2]
            median_y_reason = f"* Sort {y_axis_title} values: {sorted_y}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_y_value}"

        # Count
        count_answer = len(x_data_indices)
        
        # Chart QA Pool
        easy_qa_pool = {
            "one_step__statistics__sum_x": {
                "question": [
                    f"What is the total {x_axis_title} for all the {scatter_category_plural} in this chart?",
                    f"For all the {scatter_category_plural} in this chart, what is the sum of their {x_axis_title}?",
                    f"Can you help calculate the sum of {x_axis_title} for all the {scatter_category_plural} in this chart?",
                    f"Please compute the sum of {x_axis_title} for all the {scatter_category_plural} in this chart.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to sum them up to calculate the total {x_axis_title} of all the {scatter_category_plural}" + (f":\n{sum_x_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": sum_x_answer,
                "mask": {
                    "step_1": x_data_indices,
                    "step_2": x_data_indices if self.force_ground else [],
                    "answer": x_data_indices,
                },
            },
            "one_step__statistics__sum_y": {
                "question": [
                    f"What is the total {y_axis_title} for all the {scatter_category_plural} in this chart?",
                    f"For all the {scatter_category_plural} in this chart, what is the sum of their {y_axis_title}?",
                    f"Can you help calculate the sum of {y_axis_title} for all the {scatter_category_plural} in this chart?",
                    f"Please compute the sum of {y_axis_title} for all the {scatter_category_plural} in this chart.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to sum them up to calculate the total {y_axis_title} of all the {scatter_category_plural}" + (f":\n{sum_y_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": sum_y_answer,
                "mask": {
                    "step_1": y_data_indices,
                    "step_2": y_data_indices if self.force_ground else [],
                    "answer": y_data_indices,
                },
            },
            "one_step__statistics__mean_x": {
                "question": [
                    f"What is the mean {x_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is their mean {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {x_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the mean {x_axis_title} of all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                    f"What is the average {x_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is their average {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the average {x_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the average {x_axis_title} of all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the mean {x_axis_title} of all the {scatter_category_plural}" + (f":\n{mean_x_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": mean_x_answer,
                "mask": {
                    "step_1": x_data_indices,
                    "step_2": x_data_indices if self.force_ground else [],
                    "answer": x_data_indices,
                },
            },
            "one_step__statistics__mean_y": {
                "question": [
                    f"What is the mean {y_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {y_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the mean {y_axis_title} of all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                    f"What is the average {y_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is their average {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the average {y_axis_title} of all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the average {y_axis_title} of all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the mean {y_axis_title} of all the {scatter_category_plural}" + (f":\n{mean_y_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": mean_y_answer,
                "mask": {
                    "step_1": y_data_indices,
                    "step_2": y_data_indices if self.force_ground else [],
                    "answer": y_data_indices,
                },
            },
            "one_step__statistics__median_x": {
                "question": [
                    f"What is the median value of {x_axis_title} among all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is the median value of their {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median value of {x_axis_title} for all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the median value of {x_axis_title} for all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                    f"What is the median {x_axis_title} among all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is the median of their {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median {x_axis_title} for all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the median {x_axis_title} for all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of all the {scatter_category_plural} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the median {x_axis_title} of all the {scatter_category_plural}" + (f":\n{median_x_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": median_x_value,
                "mask": {
                    "step_1": x_data_indices,
                    "step_2": median_x_indices,
                    "answer": median_x_indices,
                },
            },
            "one_step__statistics__median_y": {
                "question": [
                    f"What is the median value of {y_axis_title} among all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is the median value of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median value of {y_axis_title} for all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the median value of {y_axis_title} for all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                    f"What is the median {y_axis_title} among all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"For all the {scatter_category_plural} in this chart, what is the median of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median {y_axis_title} for all the {scatter_category_plural} in this chart? Please round to two decimal places.",
                    f"Please compute the median {y_axis_title} for all the {scatter_category_plural} in this chart. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of all the {scatter_category_plural} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the median {y_axis_title} of all the {scatter_category_plural}" + (f":\n{median_y_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": median_y_value,
                "mask": {
                    "step_1": y_data_indices,
                    "step_2": median_y_indices,
                    "answer": median_y_indices,
                },
            },
            "one_step__statistics__count": {
                "question": [
                    f"How many {scatter_category_plural} are included in this chart?",
                    f"What is the number of {scatter_category_plural} shown in this chart?",
                    f"Please help count the total number of {scatter_category_plural} plotted in this chart.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to count the number of {scatter_category_plural} in this chart.",
                    },
                ],
                "constraint": "none",
                "answer": count_answer,
                "mask": {
                    "step_1": x_data_indices,
                    "answer": x_data_indices,
                },
            },
        }

        return easy_qa_pool


    ############################################################
    #                     Two-step Operator
    ############################################################

    def _two_step_statistics(self, chart_metadata: Dict, target_scatter_indices: List, constraint: str):
        """
        Statistics: sum, mean, median, count
        """
        target_scatter_indices.sort()
        x_axis_title = chart_metadata['x_label']
        y_axis_title = chart_metadata['y_label']
        scatter_category_singular = chart_metadata['scatter_category']['singular']
        scatter_category_plural = chart_metadata['scatter_category']['plural']
        
        target_x_data = [chart_metadata["scatter_x_data"][scatter_idx] for scatter_idx in target_scatter_indices]
        target_y_data = [chart_metadata["scatter_y_data"][scatter_idx] for scatter_idx in target_scatter_indices]
        scatter_labels = [chart_metadata["scatter_labels"][scatter_idx] for scatter_idx in target_scatter_indices]

        # Read
        read_x_reason = '\n'.join([f"* {scatter_labels[scatter_idx]}: {target_x_data[scatter_idx]}" for scatter_idx in range(len(target_x_data))]).strip()
        read_y_reason = '\n'.join([f"* {scatter_labels[scatter_idx]}: {target_y_data[scatter_idx]}" for scatter_idx in range(len(target_y_data))]).strip()
        read_scatter_reason = ', '.join(scatter_labels).strip()
        
        # Sum
        sum_x_answer = self._compute_data_sum(target_x_data)
        sum_x_reason = f"{'+'.join([str(ddd) for ddd in target_x_data])} = {sum_x_answer}"
        sum_y_answer = self._compute_data_sum(target_y_data)
        sum_y_reason = f"{'+'.join([str(ddd) for ddd in target_y_data])} = {sum_y_answer}"

        # Mean
        mean_x_answer = sum_x_answer / len(target_scatter_indices)
        mean_x_reason = f"{'+'.join([str(ddd) for ddd in target_x_data])}/{len(target_scatter_indices)} = {sum_x_answer}/{len(target_scatter_indices)} = {mean_x_answer}"
        mean_y_answer = sum_y_answer / len(target_scatter_indices)
        mean_y_reason = f"{'+'.join([str(ddd) for ddd in target_y_data])}/{len(target_scatter_indices)} = {sum_y_answer}/{len(target_scatter_indices)} = {mean_y_answer}"

        # Median
        median_x_value, median_x_indices = self._compute_data_median(target_x_data)
        median_y_value, median_y_indices = self._compute_data_median(target_y_data)

        # Median reasoning
        if len(target_x_data) % 2 == 1:
            median_x_reason = f"* Sort their {x_axis_title} values: {sorted(target_x_data)}\n* The median is the middle value: {median_x_value}"
        else:
            sorted_x = sorted(target_x_data)
            mid1, mid2 = sorted_x[len(sorted_x)//2-1], sorted_x[len(sorted_x)//2]
            median_x_reason = f"* Sort their {x_axis_title} values: {sorted_x}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_x_value}"
        
        if len(target_y_data) % 2 == 1:
            median_y_reason = f"* Sort their {y_axis_title} values: {sorted(target_y_data)}\n* The median is the middle value: {median_y_value}"
        else:
            sorted_y = sorted(target_y_data)
            mid1, mid2 = sorted_y[len(sorted_y)//2-1], sorted_y[len(sorted_y)//2]
            median_y_reason = f"* Sort their {y_axis_title} values: {sorted_y}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_y_value}"

        # Count
        count_answer = len(target_scatter_indices)

        # Chart QA Pool
        medium_qa_pool = {
            "two_step__statistics__sum_x": {
                "question": [
                    f"What is the total {x_axis_title} for {scatter_category_plural} {constraint}?",
                    f"For the {scatter_category_plural} {constraint}, what is the sum of their {x_axis_title}?",
                    f"Can you help calculate the sum of {x_axis_title} for {scatter_category_plural} {constraint}?",
                    f"Please compute the sum of {x_axis_title} for {scatter_category_plural} {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {scatter_category_plural} {constraint}" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their total {x_axis_title}" + (f":\n{sum_x_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": sum_x_answer,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "answer": target_scatter_indices,
                },
            },
            "two_step__statistics__sum_y": {
                "question": [
                    f"What is the total {y_axis_title} for the {scatter_category_plural} {constraint}?",
                    f"For the {scatter_category_plural} {constraint}, what is the sum of their {y_axis_title}?",
                    f"Can you help calculate the sum of {y_axis_title} for {scatter_category_plural} {constraint}?",
                    f"Please compute the sum of {y_axis_title} for all the {scatter_category_plural} {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {scatter_category_plural} {constraint}" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their total {y_axis_title}" + (f":\n{sum_y_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": sum_y_answer,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "answer": target_scatter_indices,
                },
            },
            "two_step__statistics__mean_x": {
                "question": [
                    f"What is the mean {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is their mean {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the mean {x_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                    f"What is the average {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is their average {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the average {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the average {x_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {scatter_category_plural} {constraint}" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their average {x_axis_title}" + (f":\n{mean_x_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": mean_x_answer,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "answer": target_scatter_indices,
                },
            },
            "two_step__statistics__mean_y": {
                "question": [
                    f"What is the mean {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the mean {y_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                    f"What is the average {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is their average {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the average {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the average {y_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {scatter_category_plural} {constraint}" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their average {y_axis_title}" + (f":\n{mean_y_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": mean_y_answer,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "answer": target_scatter_indices,
                },
            },
            "two_step__statistics__median_x": {
                "question": [
                    f"What is the median value of {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is the median value of their {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median value of {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the median value of {x_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                    f"What is the median of {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is the median of their {x_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median of {x_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the median of {x_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {scatter_category_plural} {constraint}" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their median {x_axis_title}" + (f":\n{median_x_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": median_x_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": median_x_indices,
                    "answer": median_x_indices,
                },
            },
            "two_step__statistics__median_y": {
                "question": [
                    f"What is the median value of {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is the median value of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median value of {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the median value of {y_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                    f"What is the median of {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {scatter_category_plural} {constraint}, what is the median of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median of {y_axis_title} for {scatter_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the median of {y_axis_title} for {scatter_category_plural} {constraint}. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {scatter_category_plural} {constraint}" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their median {y_axis_title}" + (f":\n{median_y_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": median_y_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": median_y_indices,
                    "answer": median_y_indices,
                },
            },
            "two_step__statistics__count": {
                "question": [
                    f"How many {scatter_category_plural} {constraint} are shown in this chart?",
                    f"What is the number of {scatter_category_plural} {constraint}?",
                    f"Please help count the number of {scatter_category_plural} {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to find all the {scatter_category_plural} {constraint}" + (f":\n{read_scatter_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to count the total number of these {scatter_category_plural}.",
                    },
                ],
                "constraint": constraint,
                "answer": count_answer,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "answer": target_scatter_indices,
                },
            },
        }

        return medium_qa_pool


    ############################################################
    #                     Multi-step Operator
    ############################################################

    def _multi_step_threshold(self, chart_metadata: Dict):
        """
        Threshold: above / below mean, differences
        """
        x_axis_title = chart_metadata['x_label']
        y_axis_title = chart_metadata['y_label']
        target_x_data = copy.deepcopy(chart_metadata["scatter_x_data"])
        target_y_data = copy.deepcopy(chart_metadata["scatter_y_data"])
        target_scatter_labels = copy.deepcopy(chart_metadata["scatter_labels"])
        target_scatter_indices = [iii for iii in range(len(target_scatter_labels))]
        scatter_category_singular = chart_metadata['scatter_category']['singular']
        scatter_category_plural = chart_metadata['scatter_category']['plural']

        # Sum
        x_value_sum = self._compute_data_sum(target_x_data)
        y_value_sum = self._compute_data_sum(target_y_data)

        # Mean
        x_value_mean = self._compute_data_mean(target_x_data)
        y_value_mean = self._compute_data_mean(target_y_data)

        # Above & below count
        ###### X
        x_above_mean_num, x_below_mean_num, x_scatter_idx = 0, 0, 0
        x_above_mean_indices, x_below_mean_indices = [], []
        x_above_mean_values, x_below_mean_values = [], []
        for vv in target_x_data:
            if vv > x_value_mean:
                x_above_mean_num += 1
                x_above_mean_indices.append(x_scatter_idx)
                x_above_mean_values.append(vv)
            elif vv < x_value_mean:
                x_below_mean_num += 1
                x_below_mean_indices.append(x_scatter_idx)
                x_below_mean_values.append(vv)
            x_scatter_idx += 1
        ###### Y
        y_above_mean_num, y_below_mean_num, y_scatter_idx = 0, 0, 0
        y_above_mean_indices, y_below_mean_indices = [], []
        y_above_mean_values, y_below_mean_values = [], []
        for vv in target_y_data:
            if vv > y_value_mean:
                y_above_mean_num += 1
                y_above_mean_indices.append(y_scatter_idx)
                y_above_mean_values.append(vv)
            elif vv < y_value_mean:
                y_below_mean_num += 1
                y_below_mean_indices.append(y_scatter_idx)
                y_below_mean_values.append(vv)
            y_scatter_idx += 1
        
        # Sum sublist
        x_above_mean_value_sum = self._compute_data_sum(x_above_mean_values)
        x_below_mean_value_sum = self._compute_data_sum(x_below_mean_values)
        y_above_mean_value_sum = self._compute_data_sum(y_above_mean_values)
        y_below_mean_value_sum = self._compute_data_sum(y_below_mean_values)

        # Mean sublist
        x_above_mean_value_avg = x_above_mean_value_sum/x_above_mean_num
        x_below_mean_value_avg = x_below_mean_value_sum/x_below_mean_num
        y_above_mean_value_avg = y_above_mean_value_sum/y_above_mean_num
        y_below_mean_value_avg = y_below_mean_value_sum/y_below_mean_num
        
        # Max/min among above-mean sublist
        x_max_above_mean_value = max(x_above_mean_values)
        x_min_above_mean_value = min(x_above_mean_values)
        x_max_above_mean_value_indices = self._find_indices_in_list(target_x_data, x_max_above_mean_value)
        x_min_above_mean_value_indices = self._find_indices_in_list(target_x_data, x_min_above_mean_value)
        x_max_above_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, x_max_above_mean_value_indices)
        x_min_above_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, x_min_above_mean_value_indices)

        y_max_above_mean_value = max(y_above_mean_values)
        y_min_above_mean_value = min(y_above_mean_values)
        y_max_above_mean_value_indices = self._find_indices_in_list(target_y_data, y_max_above_mean_value)
        y_min_above_mean_value_indices = self._find_indices_in_list(target_y_data, y_min_above_mean_value)
        y_max_above_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, y_max_above_mean_value_indices)
        y_min_above_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, y_min_above_mean_value_indices)

        # Max/min among below-mean sublist
        x_max_below_mean_value = max(x_below_mean_values)
        x_min_below_mean_value = min(x_below_mean_values)
        x_max_below_mean_value_indices = self._find_indices_in_list(target_x_data, x_max_below_mean_value)
        x_min_below_mean_value_indices = self._find_indices_in_list(target_x_data, x_min_below_mean_value)
        x_max_below_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, x_max_below_mean_value_indices)
        x_min_below_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, x_min_below_mean_value_indices)

        y_max_below_mean_value = max(y_below_mean_values)
        y_min_below_mean_value = min(y_below_mean_values)
        y_max_below_mean_value_indices = self._find_indices_in_list(target_y_data, y_max_below_mean_value)
        y_min_below_mean_value_indices = self._find_indices_in_list(target_y_data, y_min_below_mean_value)
        y_max_below_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, y_max_below_mean_value_indices)
        y_min_below_mean_label = self._convert_answer_idx_to_str(target_scatter_labels, y_min_below_mean_value_indices)

        # Difference between the sum of above-mean scatters and the sum of below-mean scatters
        x_sum_diff = x_above_mean_value_sum - x_below_mean_value_sum
        x_mean_diff = x_above_mean_value_avg - x_below_mean_value_avg
        y_sum_diff = y_above_mean_value_sum - y_below_mean_value_sum
        y_mean_diff = y_above_mean_value_avg - y_below_mean_value_avg
        
        # Reason
        read_x_reason = '\n'.join([f"* {target_scatter_labels[scatter_idx]}: {target_x_data[scatter_idx]}" for scatter_idx in range(len(target_x_data))]).strip()
        read_y_reason = '\n'.join([f"* {target_scatter_labels[scatter_idx]}: {target_y_data[scatter_idx]}" for scatter_idx in range(len(target_y_data))]).strip()
        read_scatter_reason = ', '.join(target_scatter_labels).strip()

        reason_x_sum = f"{'+'.join([str(nn) for nn in target_x_data])} = {x_value_sum}"
        reason_x_avg = f"{'+'.join([str(nn) for nn in target_x_data])}/{len(target_scatter_labels)} = {x_value_sum}/{len(target_scatter_labels)} = {x_value_mean}"
        reason_x_above_mean_value_sum = f"{'+'.join([str(nn) for nn in x_above_mean_values])} = {x_above_mean_value_sum}"
        reason_x_above_mean_value_avg = f"({'+'.join([str(nn) for nn in x_above_mean_values])})/{x_above_mean_num} = {x_above_mean_value_sum}/{x_above_mean_num} = {x_above_mean_value_avg}"
        reason_x_below_mean_value_sum = f"{'+'.join([str(nn) for nn in x_below_mean_values])} = {x_below_mean_value_sum}"
        reason_x_below_mean_value_avg = f"({'+'.join([str(nn) for nn in x_below_mean_values])})/{x_below_mean_num} = {x_below_mean_value_sum}/{x_below_mean_num} = {x_below_mean_value_avg}"        
        
        reason_y_sum = f"{'+'.join([str(nn) for nn in target_y_data])} = {y_value_sum}"
        reason_y_avg = f"{'+'.join([str(nn) for nn in target_y_data])}/{len(target_scatter_labels)} = {y_value_sum}/{len(target_scatter_labels)} = {y_value_mean}"
        reason_y_above_mean_value_sum = f"{'+'.join([str(nn) for nn in y_above_mean_values])} = {y_above_mean_value_sum}"
        reason_y_above_mean_value_avg = f"({'+'.join([str(nn) for nn in y_above_mean_values])})/{y_above_mean_num} = {y_above_mean_value_sum}/{y_above_mean_num} = {y_above_mean_value_avg}"
        reason_y_below_mean_value_sum = f"{'+'.join([str(nn) for nn in y_below_mean_values])} = {y_below_mean_value_sum}"
        reason_y_below_mean_value_avg = f"({'+'.join([str(nn) for nn in y_below_mean_values])})/{y_below_mean_num} = {y_below_mean_value_sum}/{y_below_mean_num} = {y_below_mean_value_avg}"
        
        

        # Chart QA Pool
        hard_qa_pool = {
            "multi_step__threshold__x__above_mean__max__value": {
                "question": [
                    f"What is the highest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the highest {x_axis_title}?",
                    f"What is the highest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the highest {x_axis_title}?",
                    f"What is the largest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the largest {x_axis_title}?",
                    f"What is the largest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the largest {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the highest {x_axis_title} among these {scatter_category_plural}" + (f":\n{x_max_above_mean_label}: {x_max_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - max value among {scatter_category_plural} above mean",
                "answer": x_max_above_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices,
                    "step_4": x_max_above_mean_value_indices,
                    "answer": x_max_above_mean_value_indices,
                },
            },
            "multi_step__threshold__y__above_mean__max__value": {
                "question": [
                    f"What is the highest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the highest {y_axis_title}?",
                    f"What is the highest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the highest {y_axis_title}?",
                    f"What is the largest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the largest {y_axis_title}?",
                    f"What is the largest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the largest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the highest {y_axis_title} among these {scatter_category_plural}" + (f":\n{y_max_above_mean_label}: {y_max_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - max value among {scatter_category_plural} above mean",
                "answer": y_max_above_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices,
                    "step_4": y_max_above_mean_value_indices,
                    "answer": y_max_above_mean_value_indices,
                },
            },
            "multi_step__threshold__x__above_mean__min__value": {
                "question": [
                    f"What is the lowest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the lowest {x_axis_title}?",
                    f"What is the lowest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the lowest {x_axis_title}?",
                    f"What is the smallest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the smallest {x_axis_title}?",
                    f"What is the smallest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the smallest {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the lowest {x_axis_title} among these {scatter_category_plural}" + (f":\n{x_min_above_mean_label}: {x_min_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - min value among {scatter_category_plural} above mean",
                "answer": x_min_above_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices,
                    "step_4": x_min_above_mean_value_indices,
                    "answer": x_min_above_mean_value_indices,
                },
            },
            "multi_step__threshold__y__above_mean__min__value": {
                "question": [
                    f"What is the lowest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the lowest {y_axis_title}?",
                    f"What is the lowest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the lowest {y_axis_title}?",
                    f"What is the smallest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the smallest {y_axis_title}?",
                    f"What is the smallest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the smallest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the lowest {y_axis_title} among these {scatter_category_plural}" + (f":\n{y_min_above_mean_label}: {y_min_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - min value among {scatter_category_plural} above mean",
                "answer": y_min_above_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices,
                    "step_4": y_min_above_mean_value_indices,
                    "answer": y_min_above_mean_value_indices,
                },
            },
            "multi_step__threshold__x__below_mean__max__value": {
                "question": [
                    f"What is the highest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the average {x_axis_title} of all {scatter_category_plural}, what is the highest {x_axis_title}?",
                    f"What is the highest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the mean {x_axis_title} of all {scatter_category_plural}, what is the highest {x_axis_title}?",
                    f"What is the largest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the average {x_axis_title} of all {scatter_category_plural}, what is the largest {x_axis_title}?",
                    f"What is the largest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the mean {x_axis_title} of all {scatter_category_plural}, what is the largest {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is lower than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the highest {x_axis_title} among these {scatter_category_plural}" + (f":\n{x_max_below_mean_label}: {x_max_below_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - max value among {scatter_category_plural} below mean",
                "answer": x_max_below_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_below_mean_indices,
                    "step_4": x_max_below_mean_value_indices,
                    "answer": x_max_below_mean_value_indices,
                },
            },
            "multi_step__threshold__y__below_mean__max__value": {
                "question": [
                    f"What is the highest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the average {y_axis_title} of all {scatter_category_plural}, what is the highest {y_axis_title}?",
                    f"What is the highest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the mean {y_axis_title} of all {scatter_category_plural}, what is the highest {y_axis_title}?",
                    f"What is the largest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the average {y_axis_title} of all {scatter_category_plural}, what is the largest {y_axis_title}?",
                    f"What is the largest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the mean {y_axis_title} of all {scatter_category_plural}, what is the largest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is lower than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the highest {y_axis_title} among these {scatter_category_plural}" + (f":\n{y_max_below_mean_label}: {y_max_below_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - max value among {scatter_category_plural} below mean",
                "answer": y_max_below_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_below_mean_indices,
                    "step_4": y_max_below_mean_value_indices,
                    "answer": y_max_below_mean_value_indices,
                },
            },
            "multi_step__threshold__x__below_mean__min__value": {
                "question": [
                    f"What is the lowest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the average {x_axis_title} of all {scatter_category_plural}, what is the lowest {x_axis_title}?",
                    f"What is the lowest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the mean {x_axis_title} of all {scatter_category_plural}, what is the lowest {x_axis_title}?",
                    f"What is the smallest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the average {x_axis_title} of all {scatter_category_plural}, what is the smallest {x_axis_title}?",
                    f"What is the smallest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} below the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the mean {x_axis_title} of all {scatter_category_plural}, what is the smallest {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is lower than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the lowest {x_axis_title} among these {scatter_category_plural}" + (f":\n{x_min_below_mean_label}: {x_min_below_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - min value among {scatter_category_plural} below mean",
                "answer": x_min_below_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_below_mean_indices,
                    "step_4": x_min_below_mean_value_indices,
                    "answer": x_min_below_mean_value_indices,
                },
            },
            "multi_step__threshold__y__below_mean__min__value": {
                "question": [
                    f"What is the lowest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the average {y_axis_title} of all {scatter_category_plural}, what is the lowest {y_axis_title}?",
                    f"What is the lowest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the mean {y_axis_title} of all {scatter_category_plural}, what is the lowest {y_axis_title}?",
                    f"What is the smallest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the average {y_axis_title} of all {scatter_category_plural}, what is the smallest {y_axis_title}?",
                    f"What is the smallest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} below the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the mean {y_axis_title} of all {scatter_category_plural}, what is the smallest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is lower than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the lowest {y_axis_title} among these {scatter_category_plural}" + (f":\n{y_min_below_mean_label}: {y_min_below_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - min value among {scatter_category_plural} below mean",
                "answer": y_min_below_mean_value,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_below_mean_indices,
                    "step_4": y_min_below_mean_value_indices,
                    "answer": y_min_below_mean_value_indices,
                },
            },
            "multi_step__threshold__x__above_mean__max__label": {
                "question": [
                    f"What is the label of the {scatter_category_singular} that has the highest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, whhat is the label of the {scatter_category_singular} that has the highest {x_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the highest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the highest {x_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the largest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, whhat is the label of the {scatter_category_singular} that has the largest {x_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the largest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the largest {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the highest {x_axis_title} among these {scatter_category_plural}" + (f":\n{x_max_above_mean_label}: {x_max_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - label of the max value among {scatter_category_plural} above mean",
                "answer": x_max_above_mean_label,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices,
                    "step_4": x_max_above_mean_value_indices,
                    "answer": x_max_above_mean_value_indices,
                },
            },
            "multi_step__threshold__y__above_mean__max__label": {
                "question": [
                    f"What is the label of the {scatter_category_singular} that has the highest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, whhat is the label of the {scatter_category_singular} that has the highest {y_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the highest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the highest {y_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the largest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, whhat is the label of the {scatter_category_singular} that has the largest {y_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the largest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the largest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the highest {y_axis_title} among these {scatter_category_plural}" + (f":\n{y_max_above_mean_label}: {y_max_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - label of the max value among {scatter_category_plural} above mean",
                "answer": y_max_above_mean_label,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices,
                    "step_4": y_max_above_mean_value_indices,
                    "answer": y_max_above_mean_value_indices,
                },
            },
            "multi_step__threshold__x__above_mean__min__label": {
                "question": [
                    f"What is the label of the {scatter_category_singular} that has the lowest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the lowest {x_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the lowest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the lowest {x_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the smallest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the smallest {x_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the smallest {x_axis_title} among {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the smallest {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the lowest {x_axis_title} among these {scatter_category_plural}" + (f":\n{x_min_above_mean_label}: {x_min_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - min value among {scatter_category_plural} above mean",
                "answer": x_min_above_mean_label,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices,
                    "step_4": x_min_above_mean_value_indices,
                    "answer": x_min_above_mean_value_indices,
                },
            },
            "multi_step__threshold__y__above_mean__min__label": {
                "question": [
                    f"What is the label of the {scatter_category_singular} that has the lowest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the lowest {y_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the lowest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the lowest {y_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the smallest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the smallest {y_axis_title}?",
                    f"What is the label of the {scatter_category_singular} that has the smallest {y_axis_title} among {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the label of the {scatter_category_singular} that has the smallest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to identify the {scatter_category_singular} with the lowest {y_axis_title} among these {scatter_category_plural}" + (f":\n{y_min_above_mean_label}: {y_min_above_mean_value}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - min value among {scatter_category_plural} above mean",
                "answer": y_min_above_mean_label,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices,
                    "step_4": y_min_above_mean_value_indices,
                    "answer": y_min_above_mean_value_indices,
                },
            },
            "multi_step__threshold__x__above_mean__sum": {
                "question": [
                    f"What is the sum of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is the sum of their {x_axis_title}?",
                    f"What is the sum of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is the sum of their {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the total {x_axis_title} of these {scatter_category_plural}" + (f":\n{reason_x_above_mean_value_sum}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - sum of {scatter_category_plural} above mean",
                "answer": x_above_mean_value_sum,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices,
                    "step_4": x_above_mean_indices if self.force_ground else [],
                    "answer": x_above_mean_indices,
                },
            },
            "multi_step__threshold__y__above_mean__sum": {
                "question": [
                    f"What is the sum of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is the sum of their {y_axis_title}?",
                    f"What is the sum of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is the sum of their {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these {scatter_category_plural}" + (f":\n{reason_y_above_mean_value_sum}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - sum of {scatter_category_plural} above mean",
                "answer": y_above_mean_value_sum,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices,
                    "step_4": y_above_mean_indices if self.force_ground else [],
                    "answer": y_above_mean_indices,
                },
            },
            "multi_step__threshold__x__below_mean__sum": {
                "question": [
                    f"What is the sum of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} below the average {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the average {x_axis_title} of all {scatter_category_plural}, what is the sum of their {x_axis_title}?",
                    f"What is the sum of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} below the mean {x_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the mean {x_axis_title} of all {scatter_category_plural}, what is the sum of their {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is lower than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the total {x_axis_title} of these {scatter_category_plural}" + (f":\n{reason_x_below_mean_value_sum}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - sum of {scatter_category_plural} below mean",
                "answer": x_below_mean_value_sum,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_below_mean_indices,
                    "step_4": x_below_mean_indices if self.force_ground else [],
                    "answer": x_below_mean_indices,
                },
            },
            "multi_step__threshold__y__below_mean__sum": {
                "question": [
                    f"What is the sum of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} below the average {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the average {y_axis_title} of all {scatter_category_plural}, what is the sum of their {y_axis_title}?",
                    f"What is the sum of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} below the mean {y_axis_title} of all {scatter_category_plural}?",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the mean {y_axis_title} of all {scatter_category_plural}, what is the sum of their {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is lower than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these {scatter_category_plural}" + (f":\n{reason_y_below_mean_value_sum}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - sum of {scatter_category_plural} below mean",
                "answer": y_below_mean_value_sum,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_below_mean_indices,
                    "step_4": y_below_mean_indices if self.force_ground else [],
                    "answer": y_below_mean_indices,
                },
            },
            "multi_step__threshold__x__above_mean__avg": {
                "question": [
                    f"What is the mean value of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} above the average {x_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the average {x_axis_title} of all {scatter_category_plural}, what is their mean {x_axis_title}? Please round to two decimal places.",
                    f"What is the average value of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} above the mean {x_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {x_axis_title} are above the mean {x_axis_title} of all {scatter_category_plural}, what is their average {x_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the mean {x_axis_title} of these {scatter_category_plural}" + (f":\n{reason_x_above_mean_value_avg}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - mean of {scatter_category_plural} above mean",
                "answer": x_above_mean_value_avg,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices,
                    "step_4": x_above_mean_indices if self.force_ground else [],
                    "answer": x_above_mean_indices,
                },
            },
            "multi_step__threshold__y__above_mean__avg": {
                "question": [
                    f"What is the mean value of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} above the average {y_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the average {y_axis_title} of all {scatter_category_plural}, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"What is the average value of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} above the mean {y_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {y_axis_title} are above the mean {y_axis_title} of all {scatter_category_plural}, what is their average {y_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_above_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the mean {y_axis_title} of these {scatter_category_plural}" + (f":\n{reason_y_above_mean_value_avg}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - mean of {scatter_category_plural} above mean",
                "answer": y_above_mean_value_avg,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices,
                    "step_4": y_above_mean_indices if self.force_ground else [],
                    "answer": y_above_mean_indices,
                },
            },
            "multi_step__threshold__x__below_mean__avg": {
                "question": [
                    f"What is the mean value of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} below the average {x_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the average {x_axis_title} of all {scatter_category_plural}, what is their mean {x_axis_title}? Please round to two decimal places.",
                    f"What is the average value of {x_axis_title} for {scatter_category_plural} that have their {x_axis_title} below the mean {x_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {x_axis_title} are below the mean {x_axis_title} of all {scatter_category_plural}, what is their average {x_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {x_axis_title} is lower than {x_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in x_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the mean {x_axis_title} of these {scatter_category_plural}" + (f":\n{reason_x_below_mean_value_avg}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"X - mean of {scatter_category_plural} below mean",
                "answer": x_below_mean_value_avg,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_below_mean_indices,
                    "step_4": x_below_mean_indices if self.force_ground else [],
                    "answer": x_below_mean_indices,
                },
            },
            "multi_step__threshold__y__below_mean__avg": {
                "question": [
                    f"What is the mean value of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} below the average {y_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the average {y_axis_title} of all {scatter_category_plural}, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"What is the average value of {y_axis_title} for {scatter_category_plural} that have their {y_axis_title} below the mean {y_axis_title} of all {scatter_category_plural}? Please round to two decimal places.",
                    f"Among {scatter_category_plural} whose {y_axis_title} are below the mean {y_axis_title} of all {scatter_category_plural}, what is their average {y_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the {scatter_category_plural} whose {y_axis_title} is lower than {y_value_mean}" + (f":\n{', '.join(target_scatter_labels[iii] for iii in y_below_mean_indices)}" if self.detailed_reasoning else "."),
                        "step_4": f"Fourth, I need to calculate the mean {y_axis_title} of these {scatter_category_plural}" + (f":\n{reason_y_below_mean_value_avg}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"mean of {scatter_category_plural} below mean",
                "answer": y_below_mean_value_avg,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_below_mean_indices,
                    "step_4": y_below_mean_indices if self.force_ground else [],
                    "answer": y_below_mean_indices,
                },
            },
            "multi_step__threshold__x__mean__sum_diff": {
                "question": [
                    f"What is the absolute difference between the total {x_axis_title} of {scatter_category_plural} above the average {x_axis_title} and those below it?",
                    f"What is the absolute value of the difference between the total {x_axis_title} for {scatter_category_plural} above the average and those below the average {x_axis_title}?",
                    f"What is the absolute difference between the total {x_axis_title} of {scatter_category_plural} above the mean {x_axis_title} and those below it?",
                    f"What is the absolute value of the difference between the total {x_axis_title} for {scatter_category_plural} above the mean {x_axis_title} and those below the mean {x_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find the first group of {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean} and the second group of {scatter_category_plural} whose {x_axis_title} is lower than {x_value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {x_axis_title} of these two groups of {scatter_category_plural} respectively" + (f":\n* Group 1 (above overall mean): {reason_x_above_mean_value_sum}\n* Group 2 (below overall mean): {reason_x_below_mean_value_sum}" if self.detailed_reasoning else "."),
                        "step_5": f"Fifth, I need to calculate the absolute difference between these two groups of {scatter_category_plural}" + (f":\n|{x_above_mean_value_sum} - {x_below_mean_value_sum}| = {x_sum_diff}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - sum difference bewteen {scatter_category_plural} above and below mean",
                "answer": x_sum_diff,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices + x_below_mean_indices,
                    "step_4": x_above_mean_indices + x_below_mean_indices if self.force_ground else [],
                    "step_5": x_above_mean_indices + x_below_mean_indices if self.force_ground else [],
                    "answer": x_above_mean_indices + x_below_mean_indices,
                },
            },
            "multi_step__threshold__y__mean__sum_diff": {
                "question": [
                    f"What is the absolute difference between the total {y_axis_title} of {scatter_category_plural} above the average {y_axis_title} and those below it?",
                    f"What is the absolute value of the difference between the total {y_axis_title} for {scatter_category_plural} above the average and those below the average {y_axis_title}?",
                    f"What is the absolute difference between the total {y_axis_title} of {scatter_category_plural} above the mean {y_axis_title} and those below it?",
                    f"What is the absolute value of the difference between the total {y_axis_title} for {scatter_category_plural} above the mean {y_axis_title} and those below the mean {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find the first group of {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean} and the second group of {scatter_category_plural} whose {y_axis_title} is lower than {y_value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these two groups of {scatter_category_plural} respectively" + (f":\n* Group 1 (above overall mean): {reason_y_above_mean_value_sum}\n* Group 2 (below overall mean): {reason_y_below_mean_value_sum}" if self.detailed_reasoning else "."),
                        "step_5": f"Fifth, I need to calculate the absolute difference between these two groups of {scatter_category_plural}" + (f":\n|{y_above_mean_value_sum} - {y_below_mean_value_sum}| = {y_sum_diff}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"Y - sum difference bewteen {scatter_category_plural} above and below mean",
                "answer": y_sum_diff,
                "mask": {
                    "step_1": target_scatter_indices if self.force_ground else [],
                    "step_2": target_scatter_indices,
                    "step_3": y_above_mean_indices + y_below_mean_indices,
                    "step_4": y_above_mean_indices + y_below_mean_indices if self.force_ground else [],
                    "step_5": y_above_mean_indices + y_below_mean_indices if self.force_ground else [],
                    "answer": y_above_mean_indices + y_below_mean_indices,
                },
            },
            "multi_step__threshold__x__mean__mean_diff": {
                "question": [
                    f"What is the absolute difference between the average {x_axis_title} of {scatter_category_plural} above and below the overall average? Please round to two decimal places.",
                    f"What is the absolute difference between the mean {x_axis_title} of {scatter_category_plural} above the overall average and those below it? Please round to two decimal places.",
                    f"What is the absolute difference between the average {x_axis_title} of {scatter_category_plural} whose values are above the overall average and those whose values are below it? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {x_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_x_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {x_axis_title} of all {scatter_category_plural}" + (f":\n{reason_x_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find the first group of {scatter_category_plural} whose {x_axis_title} is higher than {x_value_mean} and the second group of {scatter_category_plural} whose {x_axis_title} is lower than {x_value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {x_axis_title} of these two groups of {scatter_category_plural} respectively" + (f":\n* Group 1 (above overall mean): {reason_x_above_mean_value_avg}\n* Group 2 (below overall mean): {reason_x_below_mean_value_avg}" if self.detailed_reasoning else "."),
                        "step_5": f"Fifth, I need to calculate the absolute difference between these two groups of {scatter_category_plural}" + (f":\n|{x_above_mean_value_avg} - {x_below_mean_value_avg}| = {x_mean_diff}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"mean difference bewteen {scatter_category_plural} above and below mean",
                "answer": x_mean_diff,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": x_above_mean_indices + x_below_mean_indices,
                    "step_4": x_above_mean_indices + x_below_mean_indices if self.force_ground else [],
                    "step_5": x_above_mean_indices + x_below_mean_indices if self.force_ground else [],
                    "answer": x_above_mean_indices + x_below_mean_indices,
                },
            },
            "multi_step__threshold__y__mean__mean_diff": {
                "question": [
                    f"What is the absolute difference between the average {y_axis_title} of {scatter_category_plural} above and below the overall average? Please round to two decimal places.",
                    f"What is the absolute difference between the mean {y_axis_title} of {scatter_category_plural} above the overall average and those below it? Please round to two decimal places.",
                    f"What is the absolute difference between the average {y_axis_title} of {scatter_category_plural} whose values are above the overall average and those whose values are below it? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each {scatter_category_singular} in this chart" + (f":\n{read_y_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} of all {scatter_category_plural}" + (f":\n{reason_y_avg}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find the first group of {scatter_category_plural} whose {y_axis_title} is higher than {y_value_mean} and the second group of {scatter_category_plural} whose {y_axis_title} is lower than {y_value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these two groups of {scatter_category_plural} respectively" + (f":\n* Group 1 (above overall mean): {reason_y_above_mean_value_avg}\n* Group 2 (below overall mean): {reason_y_below_mean_value_avg}" if self.detailed_reasoning else "."),
                        "step_5": f"Fifth, I need to calculate the absolute difference between these two groups of {scatter_category_plural}" + (f":\n|{y_above_mean_value_avg} - {y_below_mean_value_avg}| = {y_mean_diff}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": f"mean difference bewteen {scatter_category_plural} above and below mean",
                "answer": y_mean_diff,
                "mask": {
                    "step_1": target_scatter_indices,
                    "step_2": target_scatter_indices if self.force_ground else [],
                    "step_3": y_above_mean_indices + y_below_mean_indices,
                    "step_4": y_above_mean_indices + y_below_mean_indices if self.force_ground else [],
                    "step_5": y_above_mean_indices + y_below_mean_indices if self.force_ground else [],
                    "answer": y_above_mean_indices + y_below_mean_indices,
                },
            },
        }

        return hard_qa_pool
