"""
Histogram curriculum generator
"""

import os
import json
import copy
import numpy as np
from typing import List, Dict
from metachart import MetaGenerator


class HistogramGenerator(MetaGenerator):
    def __init__(self, args, chart_id):
        super().__init__(args)
        self.args = args
        self.chart_type = args.chart_type
        self.chart_id = chart_id
        self.all_qa_data_list = []
        self.round_num = 2
        self.qa_idx = 0
        self.detailed_reasoning = args.detailed_reasoning
        self.force_ground = args.force_ground
    


    ############################################################
    #   One-step Operator: h(list[Bar] | list[v]) → list[v]
    ############################################################

    def _one_step_statistics(self, chart_metadata: Dict):
        """
        Statistics: sum, mean, median, count for histogram bins
        """
        x_axis_title = chart_metadata["x_label"]
        y_axis_title = chart_metadata["y_label"]
        
        # Extract histogram-specific data
        histogram_data = chart_metadata["histogram_data"]
        bin_edges = chart_metadata["bin_edges"]
        
        # Calculate bin frequencies (this is what we do statistics on)
        bin_frequencies = self._calculate_bin_frequencies(histogram_data, bin_edges)

        # Create bin labels
        bin_labels = self._create_bin_labels(bin_edges)
        
        # Read reason
        read_reason = "\n".join([f"* {bin_labels[ii]}: {bin_frequencies[ii]}" for ii in range(len(bin_frequencies))])
        
        # Create bin list for mask (indices of all bins)
        bin_list = [i for i in range(len(bin_frequencies))]

        # Sum (total frequency across all bins)
        sum_answer = self._compute_data_sum(bin_frequencies)
        reason_sum = f"{'+'.join([str(freq) for freq in bin_frequencies])} = {sum_answer}"

        # Mean (average frequency per bin)
        mean_answer = sum_answer / len(bin_frequencies)
        reason_mean = f"{'+'.join([str(freq) for freq in bin_frequencies])}/{len(bin_frequencies)} = {sum_answer}/{len(bin_frequencies)} = {mean_answer}"

        # Median (median frequency)
        median_value, median_indices = self._compute_data_median(bin_frequencies)

        # Median reasoning
        if len(bin_frequencies) % 2 == 1:
            median_reason = f"* Sort bin frequencies: {sorted(bin_frequencies)}\n* The median is the middle value: {median_value}"
        else:
            sorted_frequencies = sorted(bin_frequencies)
            mid1, mid2 = sorted_frequencies[len(sorted_frequencies)//2-1], sorted_frequencies[len(sorted_frequencies)//2]
            median_reason = f"* Sort bin frequencies: {sorted_frequencies}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_value}"

        # Count (number of bins)
        non_zero_bins = []
        for ii in bin_frequencies:
            if ii > 0:
                non_zero_bins.append(ii)
        count_answer = len(non_zero_bins)

        # Chart QA Pool
        easy_qa_pool = {
            "one_step__statistics__sum": {
                "question": [
                    f"What is the total {y_axis_title} across all bins in this histogram?",
                    f"What is the sum of {y_axis_title} for all bins in this histogram?",
                    f"Can you help calculate the total {y_axis_title} across all bins in this histogram?",
                    f"Please compute the sum of {y_axis_title} for all bins in this histogram.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the total {y_axis_title} across all bins" + (f":\n{reason_sum}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": sum_answer,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "answer": bin_list,
                },
            },
            "one_step__statistics__mean": {
                "question": [
                    f"What is the mean {y_axis_title} across all bins in this histogram? Please round to two decimal places.",
                    f"What is the average {y_axis_title} per bin in this histogram? Please round to two decimal places.",
                    f"Can you help calculate the mean {y_axis_title} across all bins in this histogram? Please round to two decimal places.",
                    f"Please compute the average {y_axis_title} per bin in this histogram. Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the mean {y_axis_title} across all bins" + (f":\n{reason_mean}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": mean_answer,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "answer": bin_list,
                },
            },
            "one_step__statistics__median": {
                "question": [
                    f"What is the median {y_axis_title} across all bins in this histogram?",
                    f"What is the median frequency value among all bins in this histogram?",
                    f"Can you help calculate the median {y_axis_title} across all bins in this histogram?",
                    f"Please compute the median frequency value among all bins in this histogram.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of all bins in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the median {y_axis_title} across all bins" + (f":\n{median_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "none",
                "answer": median_value,
                "mask": {
                    "step_1": bin_list,
                    "step_2": median_indices,
                    "answer": median_indices,
                },
            },
            "one_step__statistics__count": {
                "question": [
                    f"How many bins are shown in this histogram?",
                    f"What is the number of bins in this histogram?",
                    f"Please help count the total number of bins in this histogram.",
                    f"How many intervals are displayed in this histogram?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to count the number of bins in this histogram.",
                    },
                ],
                "constraint": "none",
                "answer": count_answer,
                "mask": {
                    "step_1": bin_list,
                    "answer": bin_list,
                },
            },
        }

        return easy_qa_pool

    
    ############################################################
    #                     Two-step Operator
    ############################################################

    def _two_step_statistics(self, chart_metadata: Dict, histogram_list: List, constraint: str):
        """
        Statistics: sum, mean, median, count
        """
        x_axis_title = chart_metadata["x_label"]
        y_axis_title = chart_metadata["y_label"]
        
        # Extract histogram-specific data
        histogram_data = chart_metadata["histogram_data"]
        bin_edges = chart_metadata["bin_edges"]
        bin_frequencies = self._calculate_bin_frequencies(histogram_data, bin_edges)
        bin_labels = self._create_bin_labels(bin_edges)
        
        # Get target data for specified bin indices
        target_bin_data = [bin_frequencies[bin_idx] for bin_idx in histogram_list]
        target_bin_labels = [bin_labels[bin_idx] for bin_idx in histogram_list]
        
        # Read reason
        read_reason = "\n".join([f"* {target_bin_labels[ii]}: {target_bin_data[ii]}" for ii in range(len(target_bin_data))])
        
        # Sum
        sum_answer = self._compute_data_sum(target_bin_data)
        reason_sum = f"{'+'.join([str(freq) for freq in target_bin_data])} = {sum_answer}"

        # Mean
        mean_answer = sum_answer / len(histogram_list)
        reason_mean = f"({'+'.join([str(freq) for freq in target_bin_data])})/{len(histogram_list)} = {sum_answer}/{len(histogram_list)} = {mean_answer}"

        # Median
        median_value, median_indices = self._compute_data_median(target_bin_data)
        median_indices = [histogram_list[i] for i in median_indices]

        # Median reasoning
        if len(target_bin_data) % 2 == 1:
            median_reason = f"* Sort their frequencies: {sorted(target_bin_data)}\n* The median is the middle value: {median_value}"
        else:
            sorted_frequencies = sorted(target_bin_data)
            mid1, mid2 = sorted_frequencies[len(sorted_frequencies)//2-1], sorted_frequencies[len(sorted_frequencies)//2]
            median_reason = f"* Sort their frequencies: {sorted_frequencies}\n* The median is the average of the two middle values: ({mid1} + {mid2})/2 = {median_value}"

        # Count
        non_zero_bins = []
        for ii in target_bin_data:
            if ii > 0:
                non_zero_bins.append(ii)
        count_answer = len(non_zero_bins)

        # Chart QA Pool
        medium_qa_pool = {
            "two_step__statistics__sum": {
                "question": [
                    f"What is the sum of {y_axis_title} for bins {constraint}?",
                    f"For the bins {constraint}, what is the sum of their {y_axis_title}?",
                    f"Can you help calculate the sum of {y_axis_title} for bins {constraint}?",
                    f"Please compute the sum of {y_axis_title} for bins {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the bins {constraint}" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their total {y_axis_title}" + (f":\n{reason_sum}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": sum_answer,
                "mask": {
                    "step_1": histogram_list,
                    "step_2": histogram_list if self.force_ground else [],
                    "answer": histogram_list,
                },
            },
            "two_step__statistics__mean": {
                "question": [
                    f"What is the mean {y_axis_title} for bins {constraint}? Please round to two decimal places.",
                    f"For the bins {constraint}, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"Can you help calculate the mean {y_axis_title} for bins {constraint}? Please round to two decimal places.",
                    f"Please compute the mean {y_axis_title} for bins {constraint}. Please round to two decimal places.",
                    f"What is the average {y_axis_title} for bins {constraint}? Please round to two decimal places.",
                    f"For the bins {constraint}, what is their average {y_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the bins {constraint}" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their average {y_axis_title}" + (f":\n{reason_mean}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": mean_answer,
                "mask": {
                    "step_1": histogram_list,
                    "step_2": histogram_list if self.force_ground else [],
                    "answer": histogram_list,
                },
            },
            "two_step__statistics__median": {
                "question": [
                    f"What is the median {y_axis_title} for bins {constraint}?",
                    f"For the bins {constraint}, what is the median of their {y_axis_title}?",
                    f"Can you help calculate the median {y_axis_title} for bins {constraint}?",
                    f"Please compute the median {y_axis_title} for bins {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to identify the bins {constraint}" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate their median {y_axis_title}" + (f":\n{median_reason}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": constraint,
                "answer": median_value,
                "mask": {
                    "step_1": histogram_list,
                    "step_2": median_indices,
                    "answer": median_indices,
                },
            },
            "two_step__statistics__count": {
                "question": [
                    f"How many bins {constraint} are shown in this histogram?",
                    f"What is the number of bins {constraint}?",
                    f"Please help count the number of bins {constraint}.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to find all the bins {constraint}" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to count the total number of these bins.",
                    },
                ],
                "constraint": constraint,
                "answer": count_answer,
                "mask": {
                    "step_1": histogram_list,
                    "step_2": histogram_list if self.force_ground else [],
                    "answer": histogram_list,
                },
            },
        }

        return medium_qa_pool

    

    ############################################################
    #                     Multi-step Operator
    ############################################################

    def _multi_step_threshold(self, chart_metadata: Dict):
        """
        Threshold: above / below mean, differences
        """
        x_axis_title = chart_metadata["x_label"]
        y_axis_title = chart_metadata["y_label"]
        
        # Extract histogram-specific data
        histogram_data = chart_metadata["histogram_data"]
        bin_edges = chart_metadata["bin_edges"]
        bin_frequencies = self._calculate_bin_frequencies(histogram_data, bin_edges)
        bin_labels = self._create_bin_labels(bin_edges)
        bin_list = [i for i in range(len(bin_frequencies))]
        
        # Read reason
        read_reason = "\n".join([f"* {bin_labels[ii]}: {bin_frequencies[ii]}" for ii in range(len(bin_frequencies))])
        
        # Statistics for all bins
        value_sum = self._compute_data_sum(bin_frequencies)
        value_mean = self._compute_data_mean(bin_frequencies)
        
        # Separate bins into above-mean and below-mean groups
        above_num, below_num, bin_idx = 0, 0, 0
        above_bin_idx, below_bin_idx = [], []
        above_bin_data, below_bin_data = [], []
        for freq in bin_frequencies:
            if freq > value_mean:
                above_num += 1
                above_bin_idx.append(bin_idx)
                above_bin_data.append(freq)
            elif freq < value_mean:
                below_num += 1
                below_bin_idx.append(bin_idx)
                below_bin_data.append(freq)
            bin_idx += 1

        # Statistics for subgroups
        above_sum = self._compute_data_sum(above_bin_data)
        below_sum = self._compute_data_sum(below_bin_data)
        above_sum_str = f"{'+'.join([str(n) for n in above_bin_data])} = {above_sum}"
        below_sum_str = f"{'+'.join([str(n) for n in below_bin_data])} = {below_sum}"

        above_mean = above_sum / above_num
        below_mean = below_sum / below_num
        above_mean_str = f"({'+'.join([str(n) for n in above_bin_data])})/{above_num} = {above_sum}/{above_num} = {above_mean}"
        below_mean_str = f"({'+'.join([str(n) for n in below_bin_data])})/{below_num} = {below_sum}/{below_num} = {below_mean}"

        # Extrema within subgroups
        max_above_mean_value = max(above_bin_data)
        min_above_mean_value = min(above_bin_data)
        max_above_value_indices = self._find_indices_in_list(bin_frequencies, max_above_mean_value)
        min_above_value_indices = self._find_indices_in_list(bin_frequencies, min_above_mean_value)
        # max_above_mean_label = self._convert_answer_idx_to_str(bin_labels, max_above_value_indices)
        # min_above_mean_label = self._convert_answer_idx_to_str(bin_labels, min_above_value_indices)
        max_above_mean_label = [bin_labels[i] for i in max_above_value_indices]
        min_above_mean_label = [bin_labels[i] for i in min_above_value_indices]

        max_below_mean_value = max(below_bin_data)
        min_below_mean_value = min(below_bin_data)
        max_below_value_indices = self._find_indices_in_list(bin_frequencies, max_below_mean_value)
        min_below_value_indices = self._find_indices_in_list(bin_frequencies, min_below_mean_value)
        # max_below_mean_label = self._convert_answer_idx_to_str(bin_labels, max_below_value_indices)
        # min_below_mean_label = self._convert_answer_idx_to_str(bin_labels, min_below_value_indices)
        max_below_mean_label = [bin_labels[i] for i in max_below_value_indices]
        min_below_mean_label = [bin_labels[i] for i in min_below_value_indices]

        # Differences between groups
        sum_diff = abs(above_sum - below_sum)
        mean_diff = abs(above_mean - below_mean)

        # Chart QA Pool
        hard_qa_pool = {
            "multi_step__threshold__above_mean__max__value": {
                "question": [
                    f"What is the highest {y_axis_title} among bins that have their {y_axis_title} above the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the average {y_axis_title} of all bins, what is the highest {y_axis_title}?",
                    f"What is the highest {y_axis_title} among bins that have their {y_axis_title} above the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the mean {y_axis_title} of all bins, what is the highest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is higher than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the highest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "max value among bins above mean",
                "answer": max_above_mean_value,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": above_bin_idx,
                    "step_4": max_above_value_indices,
                    "answer": max_above_value_indices,
                },
            },
            "multi_step__threshold__above_mean__min__value": {
                "question": [
                    f"What is the lowest {y_axis_title} among bins that have their {y_axis_title} above the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the average {y_axis_title} of all bins, what is the lowest {y_axis_title}?",
                    f"What is the lowest {y_axis_title} among bins that have their {y_axis_title} above the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the mean {y_axis_title} of all bins, what is the lowest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is higher than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the lowest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "min value among bins above mean",
                "answer": min_above_mean_value,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": above_bin_idx,
                    "step_4": min_above_value_indices,
                    "answer": min_above_value_indices,
                },
            },
            "multi_step__threshold__above_mean__max__label": {
                "question": [
                    f"Which bin has the highest {y_axis_title} among bins that have their {y_axis_title} above the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the average {y_axis_title} of all bins, which bin has the highest {y_axis_title}?",
                    f"Which bin has the highest {y_axis_title} among bins that have their {y_axis_title} above the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the mean {y_axis_title} of all bins, which bin has the highest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is higher than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the highest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "label of the max value among bins above mean",
                "answer": max_above_mean_label,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": above_bin_idx,
                    "step_4": max_above_value_indices,
                    "answer": max_above_value_indices,
                },
            },
            "multi_step__threshold__above_mean__min__label": {
                "question": [
                    f"Which bin has the lowest {y_axis_title} among bins that have their {y_axis_title} above the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the average {y_axis_title} of all bins, which bin has the lowest {y_axis_title}?",
                    f"Which bin has the lowest {y_axis_title} among bins that have their {y_axis_title} above the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the mean {y_axis_title} of all bins, which bin has the lowest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is higher than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the lowest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "label of the min value among bins above mean",
                "answer": min_above_mean_label,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": above_bin_idx,
                    "step_4": min_above_value_indices,
                    "answer": min_above_value_indices,
                },
            },
            "multi_step__threshold__below_mean__max__value": {
                "question": [
                    f"What is the highest {y_axis_title} among bins that have their {y_axis_title} below the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the average {y_axis_title} of all bins, what is the highest {y_axis_title}?",
                    f"What is the highest {y_axis_title} among bins that have their {y_axis_title} below the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the mean {y_axis_title} of all bins, what is the highest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the highest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "max value among bins below mean",
                "answer": max_below_mean_value,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": below_bin_idx,
                    "step_4": max_below_value_indices,
                    "answer": max_below_value_indices,
                },
            },
            "multi_step__threshold__below_mean__min__value": {
                "question": [
                    f"What is the lowest {y_axis_title} among bins that have their {y_axis_title} below the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the average {y_axis_title} of all bins, what is the lowest {y_axis_title}?",
                    f"What is the lowest {y_axis_title} among bins that have their {y_axis_title} below the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the mean {y_axis_title} of all bins, what is the lowest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the lowest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "min value among bins below mean",
                "answer": min_below_mean_value,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": below_bin_idx,
                    "step_4": min_below_value_indices,
                    "answer": min_below_value_indices,
                },
            },
            "multi_step__threshold__below_mean__max__label": {
                "question": [
                    f"Which bin has the highest {y_axis_title} among bins that have their {y_axis_title} below the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the average {y_axis_title} of all bins, which bin has the highest {y_axis_title}?",
                    f"Which bin has the highest {y_axis_title} among bins that have their {y_axis_title} below the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the mean {y_axis_title} of all bins, which bin has the highest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the highest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "label of the max value among bins below mean",
                "answer": max_below_mean_label,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": below_bin_idx,
                    "step_4": max_below_value_indices,
                    "answer": max_below_value_indices,
                },
            },
            "multi_step__threshold__below_mean__min__label": {
                "question": [
                    f"Which bin has the lowest {y_axis_title} among bins that have their {y_axis_title} below the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the average {y_axis_title} of all bins, which bin has the lowest {y_axis_title}?",
                    f"Which bin has the lowest {y_axis_title} among bins that have their {y_axis_title} below the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the mean {y_axis_title} of all bins, which bin has the lowest {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to compute the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to identify the bin with the lowest {y_axis_title} among these bins.",
                    },
                ],
                "constraint": "label of the min value among bins below mean",
                "answer": min_below_mean_label,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": below_bin_idx,
                    "step_4": min_below_value_indices,
                    "answer": min_below_value_indices,
                },
            },
            "multi_step__threshold__above_mean__sum": {
                "question": [
                    f"What is the sum of {y_axis_title} for bins that have their {y_axis_title} above the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the average {y_axis_title} of all bins, what is the sum of their {y_axis_title}?",
                    f"What is the sum of {y_axis_title} for bins that have their {y_axis_title} above the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are above the mean {y_axis_title} of all bins, what is the sum of their {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is higher than {value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these bins" + (f":\n{above_sum_str}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "sum of bins above mean",
                "answer": above_sum,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": above_bin_idx,
                    "step_4": above_bin_idx if self.force_ground else [],
                    "answer": above_bin_idx,
                },
            },
            "multi_step__threshold__below_mean__sum": {
                "question": [
                    f"What is the sum of {y_axis_title} for bins that have their {y_axis_title} below the average {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the average {y_axis_title} of all bins, what is the sum of their {y_axis_title}?",
                    f"What is the sum of {y_axis_title} for bins that have their {y_axis_title} below the mean {y_axis_title} of all bins?",
                    f"Among bins whose {y_axis_title} are below the mean {y_axis_title} of all bins, what is the sum of their {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these bins" + (f":\n{below_sum_str}" if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "sum of bins below mean",
                "answer": below_sum,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": below_bin_idx,
                    "step_4": below_bin_idx if self.force_ground else [],
                    "answer": below_bin_idx,
                },
            },
            "multi_step__threshold__above_mean__mean": {
                "question": [
                    f"What is the mean value of {y_axis_title} for bins that have their {y_axis_title} above the average {y_axis_title} of all bins? Please round to two decimal places.",
                    f"Among bins whose {y_axis_title} are above the average {y_axis_title} of all bins, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"What is the average value of {y_axis_title} for bins that have their {y_axis_title} above the mean {y_axis_title} of all bins? Please round to two decimal places.",
                    f"Among bins whose {y_axis_title} are above the mean {y_axis_title} of all bins, what is their average {y_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is higher than {value_mean}.",
                        "step_4": f"Fourth, I need to calculate the average {y_axis_title} of these bins" + (f":\n{above_sum_str}/{above_num} = {above_sum}/{above_num} = {above_mean}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "mean of bins above mean",
                "answer": above_mean,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": above_bin_idx,
                    "step_4": above_bin_idx if self.force_ground else [],
                    "answer": above_bin_idx,
                },
            },
            "multi_step__threshold__below_mean__mean": {
                "question": [
                    f"What is the mean value of {y_axis_title} for bins that have their {y_axis_title} below the average {y_axis_title} of all bins? Please round to two decimal places.",
                    f"Among bins whose {y_axis_title} are below the average {y_axis_title} of all bins, what is their mean {y_axis_title}? Please round to two decimal places.",
                    f"What is the average value of {y_axis_title} for bins that have their {y_axis_title} below the mean {y_axis_title} of all bins? Please round to two decimal places.",
                    f"Among bins whose {y_axis_title} are below the mean {y_axis_title} of all bins, what is their average {y_axis_title}? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find all the bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to calculate the average {y_axis_title} of these bins" + (f":\n{below_sum_str}/{below_num} = {below_sum}/{below_num} = {below_mean}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "mean of bins below mean",
                "answer": below_mean,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": below_bin_idx,
                    "step_4": below_bin_idx if self.force_ground else [],
                    "answer": below_bin_idx,
                },
            },
            "multi_step__threshold__mean__sum_diff": {
                "question": [
                    f"What is the absolute difference between the total {y_axis_title} of bins above the average {y_axis_title} and those below it?",
                    f"What is the absolute value of the difference between the total {y_axis_title} for bins above the average and those below the average {y_axis_title}?",
                    f"What is the absolute difference between the total {y_axis_title} of bins above the mean {y_axis_title} and those below it?",
                    f"What is the absolute value of the difference between the total {y_axis_title} for bins above the mean {y_axis_title} and those below the mean {y_axis_title}?",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find the first group of bins whose {y_axis_title} is higher than {value_mean} and the second group of bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to calculate the total {y_axis_title} of these two groups of bins respectively" + (f":\n* Above average sum: {above_sum_str}\n* Below average sum: {below_sum_str}" if self.detailed_reasoning else "."),
                        "step_5": f"Fifth, I can get the absolute difference between these two groups of bins" + (f":\n|{above_sum}-{below_sum}| = {sum_diff}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "sum difference between bins above and below mean",
                "answer": sum_diff,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": bin_list,
                    "step_4": bin_list if self.force_ground else [],
                    "step_5": bin_list if self.force_ground else [],
                    "answer": bin_list,
                },
            },
            "multi_step__threshold__mean__mean_diff": {
                "question": [
                    f"What is the absolute difference between the average {y_axis_title} of bins above and below the overall average? Please round to two decimal places.",
                    f"What is the absolute difference between the mean {y_axis_title} of bins above the overall average and those below it? Please round to two decimal places.",
                    f"What is the absolute difference between the average {y_axis_title} of bins whose values are above the overall average and those whose values are below it? Please round to two decimal places.",
                ],
                "reasoning": [
                    {
                        "step_1": f"First, I need to read the {y_axis_title} of each bin in this histogram" + (f":\n{read_reason}" if self.detailed_reasoning else "."),
                        "step_2": f"Second, I need to calculate the average {y_axis_title} among all bins" + (f":\n{value_sum}/{len(bin_list)} = {value_mean}." if self.detailed_reasoning else "."),
                        "step_3": f"Third, I need to find the first group of bins whose {y_axis_title} is higher than {value_mean} and the second group of bins whose {y_axis_title} is lower than {value_mean}.",
                        "step_4": f"Fourth, I need to calculate the mean {y_axis_title} for these two groups of bins respectively" + (f":\n* Above average mean: {above_mean_str}\n* Below average mean: {below_mean_str}" if self.detailed_reasoning else "."),
                        "step_5": f"Fifth, I can get the absolute difference between these two groups of bins" + (f":\n|{above_mean}-{below_mean}| = {mean_diff}." if self.detailed_reasoning else "."),
                    },
                ],
                "constraint": "mean difference between bins above and below mean",
                "answer": mean_diff,
                "mask": {
                    "step_1": bin_list,
                    "step_2": bin_list if self.force_ground else [],
                    "step_3": bin_list,
                    "step_4": bin_list if self.force_ground else [],
                    "step_5": bin_list if self.force_ground else [],
                    "answer": bin_list,
                },
            },
        }

        return hard_qa_pool
    