"""
Line chart curriculum generator
"""

import os
import json
import copy
from typing import List, Dict, Union, Tuple
from metachart import MetaGenerator


class LineChartGenerator(MetaGenerator):
    def __init__(self, args, chart_id):
        super().__init__(args)
        self.chart_type = args.chart_type
        self.chart_id = chart_id
        self.all_qa_data_list = []
        self.round_num = 2
        self.qa_idx = 0
        self.detailed_reasoning = args.detailed_reasoning
        self.force_ground = args.force_ground
    


    ############################################################
    #   One-step Operator: h(list[Bar] | list[v]) → list[v]
    ############################################################
    
    def _one_step_statistics(self, chart_metadata: Dict):
        """
        Statistics: sum, mean, median, count for line charts
        """
        x_axis_title = chart_metadata['x_label']
        y_axis_title = chart_metadata['y_label']
        line_category_singular = chart_metadata['line_category']['singular']
        line_category_plural = chart_metadata['line_category']['plural']
        
        # Flatten line data for overall statistics
        all_line_indices = []
        all_point_indices = []
        all_values = []
        
        for line_idx, line_values in enumerate(chart_metadata["line_data"]):
            for point_idx, value in enumerate(line_values):
                all_line_indices.append(line_idx)
                all_point_indices.append(point_idx)
                all_values.append(value)
        
        # Create mask indices as [line_idx, point_idx] pairs
        all_mask_indices = [[line_idx, point_idx] for line_idx, point_idx in zip(all_line_indices, all_point_indices)]
        
        # Read reason for all data points
        read_reason_lines = []
        for line_idx, line_values in enumerate(chart_metadata["line_data"]):
            line_label = chart_metadata["line_labels"][line_idx]
            for point_idx, value in enumerate(line_values):
                x_label = chart_metadata["x_labels"][point_idx]
                read_reason_lines.append(f"* {line_label} at {x_label}: {value}")
        read_reason = '\n'.join(read_reason_lines)
        
        # Sum
        sum_answer = self._compute_data_sum(all_values)
        sum_reason = f"{'+'.join([str(v) for v in all_values])} = {sum_answer}"
        
        # Mean
        mean_answer = sum_answer / len(all_values)
        mean_reason = f"{'+'.join([str(v) for v in all_values])}/{len(all_values)} = {sum_answer}/{len(all_values)} = {mean_answer}"
        
        # Median
        median_value, relative_median_indices = self._compute_data_median(all_values)
        median_mask_indices = [all_mask_indices[i] for i in relative_median_indices]

        # Median reasoning
        if len(all_values) % 2 == 1:
            median_reason = f"* Sort all {y_axis_title} values: {sorted(all_values)}\n* The median is the middle value: {median_value}"
        else:
            sorted_values = sorted(all_values)
            mid1, mid2 = sorted_values[len(sorted_values)//2-1], sorted_values[len(sorted_values)//2]
            median_reason = f"* Sort all {y_axis_title} values: {sorted_values}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_value}"
        
        # Count
        count_answer = len(all_values)
        
        easy_qa_pool = {
            "one_step__statistics__sum": {
                "question": [
                    f"What is the total {y_axis_title} for all data points in this line chart?",
                    f"For all data points in this line chart, what is the sum of their {y_axis_title}?",
                    f"Can you help calculate the sum of {y_axis_title} for all data points in this line chart?",
                    f"Please compute the sum of {y_axis_title} for all data points in this line chart.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to sum them up to calculate the total {y_axis_title} of all data points, which should be" + (f": {sum_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": sum_answer,
                "mask": {
                    "step_1": all_mask_indices,
                    "step_2": all_mask_indices if self.force_ground else [],
                    "answer": all_mask_indices,
                },
            },
            "one_step__statistics__mean": {
                "question": [
                    f"What is the mean {y_axis_title} of all data points in this line chart? Please round to two decimal places.",
                    f"For all data points in this line chart, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {y_axis_title} of all data points in this line chart? Please round to two decimal places.",
                    f"Please compute the mean {y_axis_title} of all data points in this line chart. Please round to two decimal places.",
                    f"What is the average {y_axis_title} of all data points in this line chart? Please round to two decimal places.",
                    f"For all data points in this line chart, what is their average {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the average {y_axis_title} of all data points in this line chart? Please round to two decimal places.",
                    f"Please compute the average {y_axis_title} of all data points in this line chart. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the mean {y_axis_title} of all data points, which should be" + (f": {mean_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": mean_answer,
                "mask": {
                    "step_1": all_mask_indices,
                    "step_2": all_mask_indices if self.force_ground else [],
                    "answer": all_mask_indices,
                },
            },
            "one_step__statistics__median": {
                "question": [
                    f"What is the median value of {y_axis_title} among all data points in this line chart? Please round to two decimal places.",
                    f"For all data points in this line chart, what is the median value of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median value of {y_axis_title} for all data points in this line chart? Please round to two decimal places.",
                    f"Please compute the median value of {y_axis_title} for all data points in this line chart. Please round to two decimal places.",
                    f"What is the median {y_axis_title} among all data points in this line chart? Please round to two decimal places.",
                    f"For all data points in this line chart, what is the median of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median {y_axis_title} for all data points in this line chart? Please round to two decimal places.",
                    f"Please compute the median {y_axis_title} for all data points in this line chart. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of all data points in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the median {y_axis_title} of all data points" + (f":\n{median_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": median_value,
                "mask": {
                    "step_1": all_mask_indices,
                    "step_2": median_mask_indices if self.force_ground else [],
                    "answer": median_mask_indices,
                },
            },
            "one_step__statistics__count": {
                "question": [
                    f"How many data points are included in this line chart?",
                    f"What is the number of data points shown in this line chart?",
                    f"Please help count the total number of data points plotted in this line chart.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to count the number of data points in this chart.",
                    },
                ],
                "constraint": "none",
                "answer": count_answer,
                "mask": {
                    "step_1": all_mask_indices,
                    "answer": all_mask_indices,
                },
            },
            "one_step__statistics__count_lines": {
                "question": [
                    f"How many {line_category_plural} are shown in this line chart?",
                    f"What is the number of {line_category_plural} displayed in this line chart?",
                    f"Please help count the total number of {line_category_plural} plotted in this line chart.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to count the number of lines representing {line_category_plural} in this chart.",
                    },
                ],
                "constraint": "none",
                "answer": len(chart_metadata["line_data"]),
                "mask": {
                    "step_1": [[i, 0] for i in range(len(chart_metadata["line_data"]))],
                    "answer": [[i, 0] for i in range(len(chart_metadata["line_data"]))],
                },
            },
        }
        
        return easy_qa_pool



    ############################################################
    #                     Two-step Operator
    ############################################################

    def _two_step_statistics(self, chart_metadata: Dict, target_line_indices: List, constraint: str):
        """
        Statistics: sum, mean, median, count for filtered line data
        """
        target_line_indices.sort()
        x_axis_title = chart_metadata['x_label']
        y_axis_title = chart_metadata['y_label']
        line_category_singular = chart_metadata['line_category']['singular']
        line_category_plural = chart_metadata['line_category']['plural']
        
        # Collect data from target lines
        target_values = []
        target_mask_indices = []
        target_line_labels = []
        
        for line_idx in target_line_indices:
            line_label = chart_metadata["line_labels"][line_idx]
            target_line_labels.append(line_label)
            line_values = chart_metadata["line_data"][line_idx]
            
            for point_idx, value in enumerate(line_values):
                target_values.append(value)
                target_mask_indices.append([line_idx, point_idx])
        
        # Read reason for target data points
        read_reason_lines = []
        for line_idx in target_line_indices:
            line_label = chart_metadata["line_labels"][line_idx]
            line_values = chart_metadata["line_data"][line_idx]
            for point_idx, value in enumerate(line_values):
                x_label = chart_metadata["x_labels"][point_idx]
                read_reason_lines.append(f"* {line_label} at {x_label}: {value}")
        read_reason = '\n'.join(read_reason_lines)
        
        # Read constraint reason (which lines are selected)
        read_constraint_reason = ', '.join(target_line_labels)
        
        # Sum
        sum_answer = self._compute_data_sum(target_values)
        sum_reason = f"{'+'.join([str(v) for v in target_values])} = {sum_answer}"
        
        # Mean
        mean_answer = sum_answer / len(target_values)
        mean_reason = f"{'+'.join([str(v) for v in target_values])}/{len(target_values)} = {sum_answer}/{len(target_values)} = {mean_answer}"
        
        # Median
        median_value, relative_median_indices = self._compute_data_median(target_values)
        median_mask_indices = [target_mask_indices[i] for i in relative_median_indices]

        # Median reasoning
        if len(target_values) % 2 == 1:
            median_reason = f"* Sort their {y_axis_title} values: {sorted(target_values)}\n* The median is the middle value: {median_value}"
        else:
            sorted_values = sorted(target_values)
            mid1, mid2 = sorted_values[len(sorted_values)//2-1], sorted_values[len(sorted_values)//2]
            median_reason = f"* Sort their {y_axis_title} values: {sorted_values}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_value}"
        
        # Count
        count_answer = len(target_values)
        count_lines_answer = len(target_line_indices)
        
        medium_qa_pool = {
            "two_step__statistics__sum": {
                "question": [
                    f"What is the total {y_axis_title} for {line_category_plural} {constraint}?",
                    f"For {line_category_plural} {constraint}, what is the sum of their {y_axis_title}?",
                    f"Can you help calculate the sum of {y_axis_title} for {line_category_plural} {constraint}?",
                    f"Please compute the sum of {y_axis_title} for {line_category_plural} {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {line_category_plural} {constraint}" + (f": {read_constraint_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to read all their {y_axis_title}" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to calculate their total {y_axis_title}" + (f": {sum_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": sum_answer,
                "mask": {
                    "step_1": [[line_idx, 0] for line_idx in target_line_indices],
                    "step_2": target_mask_indices,
                    "step_3": target_mask_indices if self.force_ground else [],
                    "answer": target_mask_indices,
                },
            },
            "two_step__statistics__mean": {
                "question": [
                    f"What is the mean {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {line_category_plural} {constraint}, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the mean {y_axis_title} for {line_category_plural} {constraint}. Please round to two decimal places.",
                    f"What is the average {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {line_category_plural} {constraint}, what is their average {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the average {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the average {y_axis_title} for {line_category_plural} {constraint}. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {line_category_plural} {constraint}" + (f": {read_constraint_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to read all their {y_axis_title}" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to calculate their average {y_axis_title}" + (f": {mean_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": mean_answer,
                "mask": {
                    "step_1": [[line_idx, 0] for line_idx in target_line_indices],
                    "step_2": target_mask_indices,
                    "step_3": target_mask_indices if self.force_ground else [],
                    "answer": target_mask_indices,
                },
            },
            "two_step__statistics__median": {
                "question": [
                    f"What is the median value of {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {line_category_plural} {constraint}, what is the median value of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median value of {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the median value of {y_axis_title} for {line_category_plural} {constraint}. Please round to two decimal places.",
                    f"What is the median of {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"For {line_category_plural} {constraint}, what is the median of their {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the median of {y_axis_title} for {line_category_plural} {constraint}? Please round to two decimal places.",
                    f"Please compute the median of {y_axis_title} for {line_category_plural} {constraint}. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {line_category_plural} {constraint}" + (f": {read_constraint_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to read all their '{y_axis_title}' values" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to calculate their median {y_axis_title}" + (f":\n{median_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": median_value,
                "mask": {
                    "step_1": [[line_idx, 0] for line_idx in target_line_indices],
                    "step_2": target_mask_indices,
                    "step_3": median_mask_indices if self.force_ground else [],
                    "answer": median_mask_indices,
                },
            },
            "two_step__statistics__count": {
                "question": [
                    f"How many data points are there for {line_category_plural} {constraint}?",
                    f"What is the number of data points for {line_category_plural} {constraint}?",
                    f"Please help count the total number of data points for {line_category_plural} {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the {line_category_plural} {constraint}" + (f": {read_constraint_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to count the total number of their data points.",
                    },
                ],
                "constraint": constraint,
                "answer": count_answer,
                "mask": {
                    "step_1": [[line_idx, 0] for line_idx in target_line_indices],
                    "step_2": target_mask_indices,
                    "answer": target_mask_indices,
                },
            },
            "two_step__statistics__count_lines": {
                "question": [
                    f"How many {line_category_plural} {constraint} are shown in this chart?",
                    f"What is the number of {line_category_plural} {constraint}?",
                    f"Please help count the number of {line_category_plural} {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify all the {line_category_plural} {constraint}" + (f": {read_constraint_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to count the total number of these {line_category_plural}.",
                    },
                ],
                "constraint": constraint,
                "answer": count_lines_answer,
                "mask": {
                    "step_1": [[line_idx, 0] for line_idx in target_line_indices],
                    "step_2": [[line_idx, 0] for line_idx in target_line_indices],
                    "answer": [[line_idx, 0] for line_idx in target_line_indices],
                },
            },
        }
        
        return medium_qa_pool



    ############################################################
    #                     Multi-step Operator
    ############################################################

    def _multi_step_threshold(self, chart_metadata: Dict):
        """
        Multi-step threshold: complex threshold analysis with multiple steps
        """
        x_axis_title = chart_metadata['x_label']
        y_axis_title = chart_metadata['y_label']
        line_category_singular = chart_metadata['line_category']['singular']
        line_category_plural = chart_metadata['line_category']['plural']
        
        # Flatten all data points for overall analysis
        all_values = []
        all_mask_indices = []
        
        for line_idx, line_values in enumerate(chart_metadata["line_data"]):
            for point_idx, value in enumerate(line_values):
                all_values.append(value)
                all_mask_indices.append([line_idx, point_idx])
        
        # Calculate overall statistics
        overall_mean = self._compute_data_mean(all_values)
        overall_sum = self._compute_data_sum(all_values)
        
        # Above & below mean analysis for data points
        above_mean_values, below_mean_values = [], []
        above_mean_mask_indices, below_mean_mask_indices = [], []
        
        for i, value in enumerate(all_values):
            if value > overall_mean:
                above_mean_values.append(value)
                above_mean_mask_indices.append(all_mask_indices[i])
            elif value < overall_mean:
                below_mean_values.append(value)
                below_mean_mask_indices.append(all_mask_indices[i])
        
        # Calculate statistics for subgroups
        above_mean_sum = self._compute_data_sum(above_mean_values) if above_mean_values else 0
        below_mean_sum = self._compute_data_sum(below_mean_values) if below_mean_values else 0
        above_mean_avg = above_mean_sum / len(above_mean_values) if above_mean_values else 0
        below_mean_avg = below_mean_sum / len(below_mean_values) if below_mean_values else 0
        
        # Find extrema among above/below mean groups
        if above_mean_values:
            max_above_mean_value = max(above_mean_values)
            min_above_mean_value = min(above_mean_values)
            max_above_mean_indices = self._find_indices_in_list(all_values, max_above_mean_value)
            min_above_mean_indices = self._find_indices_in_list(all_values, min_above_mean_value)
            max_above_mean_mask = [all_mask_indices[i] for i in max_above_mean_indices]
            min_above_mean_mask = [all_mask_indices[i] for i in min_above_mean_indices]
            
            # Generate location strings for extrema
            max_above_mean_locations = []
            min_above_mean_locations = []
            for idx in max_above_mean_indices:
                mask_idx = all_mask_indices[idx]
                line_label = chart_metadata["line_labels"][mask_idx[0]]
                x_label = chart_metadata["x_labels"][mask_idx[1]]
                max_above_mean_locations.append(f"{line_label} at {x_label}")
            for idx in min_above_mean_indices:
                mask_idx = all_mask_indices[idx]
                line_label = chart_metadata["line_labels"][mask_idx[0]]
                x_label = chart_metadata["x_labels"][mask_idx[1]]
                min_above_mean_locations.append(f"{line_label} at {x_label}")
            max_above_mean_locations_str = ", ".join(max_above_mean_locations)
            min_above_mean_locations_str = ", ".join(min_above_mean_locations)
        
        if below_mean_values:
            max_below_mean_value = max(below_mean_values)
            min_below_mean_value = min(below_mean_values)
            max_below_mean_indices = self._find_indices_in_list(all_values, max_below_mean_value)
            min_below_mean_indices = self._find_indices_in_list(all_values, min_below_mean_value)
            max_below_mean_mask = [all_mask_indices[i] for i in max_below_mean_indices]
            min_below_mean_mask = [all_mask_indices[i] for i in min_below_mean_indices]
            
            # Generate location strings for extrema
            max_below_mean_locations = []
            min_below_mean_locations = []
            for idx in max_below_mean_indices:
                mask_idx = all_mask_indices[idx]
                line_label = chart_metadata["line_labels"][mask_idx[0]]
                x_label = chart_metadata["x_labels"][mask_idx[1]]
                max_below_mean_locations.append(f"{line_label} at {x_label}")
            for idx in min_below_mean_indices:
                mask_idx = all_mask_indices[idx]
                line_label = chart_metadata["line_labels"][mask_idx[0]]
                x_label = chart_metadata["x_labels"][mask_idx[1]]
                min_below_mean_locations.append(f"{line_label} at {x_label}")
            max_below_mean_locations_str = ", ".join(max_below_mean_locations)
            min_below_mean_locations_str = ", ".join(min_below_mean_locations)
        
        # Differences
        sum_diff = abs(above_mean_sum - below_mean_sum)
        mean_diff = abs(above_mean_avg - below_mean_avg)
        
        # Read reasons
        read_reason_lines = []
        for line_idx, line_values in enumerate(chart_metadata["line_data"]):
            line_label = chart_metadata["line_labels"][line_idx]
            for point_idx, value in enumerate(line_values):
                x_label = chart_metadata["x_labels"][point_idx]
                read_reason_lines.append(f"* {line_label} at {x_label}: {value}")
        read_reason = '\n'.join(read_reason_lines)
        
        reason_overall_avg = f"{'+'.join([str(v) for v in all_values])}/{len(all_values)} = {overall_sum}/{len(all_values)} = {overall_mean:.2f}"
        reason_above_mean_sum = f"{'+'.join([str(v) for v in above_mean_values])} = {above_mean_sum}" if above_mean_values else "0"
        reason_below_mean_sum = f"{'+'.join([str(v) for v in below_mean_values])} = {below_mean_sum}" if below_mean_values else "0"
        reason_above_mean_avg = f"({'+'.join([str(v) for v in above_mean_values])})/{len(above_mean_values)} = {above_mean_sum}/{len(above_mean_values)} = {above_mean_avg:.2f}" if above_mean_values else "0"
        reason_below_mean_avg = f"({'+'.join([str(v) for v in below_mean_values])})/{len(below_mean_values)} = {below_mean_sum}/{len(below_mean_values)} = {below_mean_avg:.2f}" if below_mean_values else "0"
        
        hard_qa_pool = {}
        
        # Only add questions if we have data in both groups
        if above_mean_values:
            hard_qa_pool.update({
                "multi_step__threshold__above_mean__max__value": {
                    "question": [
                        f"What is the highest {y_axis_title} among data points that have their {y_axis_title} above the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, what is the highest {y_axis_title}?",
                        f"What is the highest {y_axis_title} among data points that have their {y_axis_title} above the overall mean {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall mean {y_axis_title}, what is the highest {y_axis_title}?",
                        f"What is the largest {y_axis_title} among data points that have their {y_axis_title} above the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, what is the largest {y_axis_title}?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is higher than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to identify the data point with the highest {y_axis_title} among these data points" + (f"\n{max_above_mean_locations_str}: {max_above_mean_value}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "max value among data points above overall mean",
                    "answer": max_above_mean_value,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices,
                        "step_4": max_above_mean_mask,
                        "answer": max_above_mean_mask,
                    },
                },
                "multi_step__threshold__above_mean__min__value": {
                    "question": [
                        f"What is the lowest {y_axis_title} among data points that have their {y_axis_title} above the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, what is the lowest {y_axis_title}?",
                        f"What is the lowest {y_axis_title} among data points that have their {y_axis_title} above the overall mean {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall mean {y_axis_title}, what is the lowest {y_axis_title}?",
                        f"What is the smallest {y_axis_title} among data points that have their {y_axis_title} above the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, what is the smallest {y_axis_title}?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is higher than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to identify the data point with the lowest {y_axis_title} among these data points" + (f":\n{min_above_mean_locations_str}: {min_above_mean_value}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "min value among data points above overall mean",
                    "answer": min_above_mean_value,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices,
                        "step_4": min_above_mean_mask,
                        "answer": min_above_mean_mask,
                    },
                },
                "multi_step__threshold__above_mean__max__location": {
                    "question": [
                        f"Where is the data point with the highest {y_axis_title} located among data points that have their {y_axis_title} above the overall average {y_axis_title}?",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, where is the data point with the highest {y_axis_title} located?",
                        f"At which point does the maximum {y_axis_title} value occur among data points above the overall mean {y_axis_title}?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is higher than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to identify the data point with the highest {y_axis_title} among these data points" + (f":\n{max_above_mean_locations_str}: {max_above_mean_value}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "location of max value among data points above overall mean",
                    "answer": max_above_mean_locations_str,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices,
                        "step_4": max_above_mean_mask,
                        "answer": max_above_mean_mask,
                    },
                },
                "multi_step__threshold__above_mean__sum": {
                    "question": [
                        f"What is the sum of {y_axis_title} for data points that have their {y_axis_title} above the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, what is the sum of their {y_axis_title}?",
                        f"What is the total {y_axis_title} for data points that have their {y_axis_title} above the overall mean {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are above the overall mean {y_axis_title}, what is the total of their {y_axis_title}?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is higher than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these data points" + (f":\n{reason_above_mean_sum}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "sum of data points above overall mean",
                    "answer": above_mean_sum,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices,
                        "step_4": above_mean_mask_indices if self.force_ground else [],
                        "answer": above_mean_mask_indices,
                    },
                },
                "multi_step__threshold__above_mean__avg": {
                    "question": [
                        f"What is the average {y_axis_title} for data points that have their {y_axis_title} above the overall average {y_axis_title} of all data points? Please round to two decimal places.",
                        f"Among data points whose {y_axis_title} are above the overall average {y_axis_title}, what is their average {y_axis_title}? Please round to two decimal places.",
                        f"What is the mean {y_axis_title} for data points that have their {y_axis_title} above the overall mean {y_axis_title} of all data points? Please round to two decimal places.",
                        f"Among data points whose {y_axis_title} are above the overall mean {y_axis_title}, what is their mean {y_axis_title}? Please round to two decimal places.",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is higher than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to calculate the average {y_axis_title} of these data points" + (f":\n{reason_above_mean_avg}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "average of data points above overall mean",
                    "answer": above_mean_avg,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices,
                        "step_4": above_mean_mask_indices if self.force_ground else [],
                        "answer": above_mean_mask_indices,
                    },
                },
            })
        
        if below_mean_values:
            hard_qa_pool.update({
                "multi_step__threshold__below_mean__max__value": {
                    "question": [
                        f"What is the highest {y_axis_title} among data points that have their {y_axis_title} below the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are below the overall average {y_axis_title}, what is the highest {y_axis_title}?",
                        f"What is the highest {y_axis_title} among data points that have their {y_axis_title} below the overall mean {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are below the overall mean {y_axis_title}, what is the highest {y_axis_title}?",
                        f"What is the largest {y_axis_title} among data points that have their {y_axis_title} below the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are below the overall average {y_axis_title}, what is the largest {y_axis_title}?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is lower than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to identify the data point with the highest {y_axis_title} among these data points" + (f":\n{max_below_mean_locations_str}: {max_below_mean_value}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "max value among data points below overall mean",
                    "answer": max_below_mean_value,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": below_mean_mask_indices,
                        "step_4": max_below_mean_mask,
                        "answer": max_below_mean_mask,
                    },
                },
                "multi_step__threshold__below_mean__min__value": {
                    "question": [
                        f"What is the lowest {y_axis_title} among data points that have their {y_axis_title} below the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are below the overall average {y_axis_title}, what is the lowest {y_axis_title}?",
                        f"What is the lowest {y_axis_title} among data points that have their {y_axis_title} below the overall mean {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are below the overall mean {y_axis_title}, what is the lowest {y_axis_title}?",
                        f"What is the smallest {y_axis_title} among data points that have their {y_axis_title} below the overall average {y_axis_title} of all data points?",
                        f"Among data points whose {y_axis_title} are below the overall average {y_axis_title}, what is the smallest {y_axis_title}?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find all data points whose {y_axis_title} is lower than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to identify the data point with the lowest {y_axis_title} among these data points" + (f":\n{min_below_mean_locations_str}: {min_below_mean_value}" if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "min value among data points below overall mean",
                    "answer": min_below_mean_value,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": below_mean_mask_indices,
                        "step_4": min_below_mean_mask,
                        "answer": min_below_mean_mask,
                    },
                },
            })
        
        if above_mean_values and below_mean_values:
            hard_qa_pool.update({
                "multi_step__threshold__sum_diff": {
                    "question": [
                        f"What is the absolute difference between the total {y_axis_title} of data points above the overall average and those below it?",
                        f"What is the absolute value of the difference between the total {y_axis_title} for data points above the overall average and those below the overall average?",
                        f"What is the absolute difference between the total {y_axis_title} of data points above the overall mean and those below it?",
                        f"What is the absolute value of the difference between the total {y_axis_title} for data points above the overall mean and those below the overall mean?",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find the first group of data points whose {y_axis_title} is higher than {overall_mean:.2f} and the second group whose {y_axis_title} is lower than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these two groups respectively" + (f":\n* Group 1 (above overall mean): {reason_above_mean_sum}\n* Group 2 (below overall mean): {reason_below_mean_sum}" if self.detailed_reasoning else "."),
                            "step_5": f"Fifth, I need to calculate the absolute difference between these two groups" + (f":\n|{above_mean_sum} - {below_mean_sum}| = {sum_diff}." if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "sum difference between data points above and below overall mean",
                    "answer": sum_diff,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices + below_mean_mask_indices,
                        "step_4": (above_mean_mask_indices + below_mean_mask_indices) if self.force_ground else [],
                        "step_5": (above_mean_mask_indices + below_mean_mask_indices) if self.force_ground else [],
                        "answer": above_mean_mask_indices + below_mean_mask_indices,
                    },
                },
                "multi_step__threshold__mean_diff": {
                    "question": [
                        f"What is the absolute difference between the average {y_axis_title} of data points above and below the overall average? Please round to two decimal places.",
                        f"What is the absolute difference between the mean {y_axis_title} of data points above the overall average and those below it? Please round to two decimal places.",
                        f"What is the absolute difference between the average {y_axis_title} of data points whose values are above the overall average and those whose values are below it? Please round to two decimal places.",
                    ],
                    "reasoning": [
                        {
                            "step_1": f"First, I need to read the {y_axis_title} of each data point in this chart" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                            "step_2": f"Second, I need to compute the overall average {y_axis_title} of all data points" + (f":\n{reason_overall_avg}" if self.detailed_reasoning else "."),
                            "step_3": f"Third, I need to find the first group of data points whose {y_axis_title} is higher than {overall_mean:.2f} and the second group whose {y_axis_title} is lower than {overall_mean:.2f}.",
                            "step_4": f"Fourth, I need to calculate the average {y_axis_title} of these two groups respectively" + (f":\n* Group 1 (above overall mean): {reason_above_mean_avg}\n* Group 2 (below overall mean): {reason_below_mean_avg}" if self.detailed_reasoning else "."),
                            "step_5": f"Fifth, I need to calculate the absolute difference between these two groups" + (f":\n|{above_mean_avg:.2f} - {below_mean_avg:.2f}| = {mean_diff:.2f}." if self.detailed_reasoning else "."),
                        },
                    ],
                    "constraint": "mean difference between data points above and below overall mean",
                    "answer": mean_diff,
                    "mask": {
                        "step_1": all_mask_indices,
                        "step_2": all_mask_indices if self.force_ground else [],
                        "step_3": above_mean_mask_indices + below_mean_mask_indices,
                        "step_4": (above_mean_mask_indices + below_mean_mask_indices) if self.force_ground else [],
                        "step_5": (above_mean_mask_indices + below_mean_mask_indices) if self.force_ground else [],
                        "answer": above_mean_mask_indices + below_mean_mask_indices,
                    },
                },
            })
        
        return hard_qa_pool
