"""
Prompt templates and instructions for rewriting tool thinking datasets.
"""

from typing import List, Dict, Optional, Tuple
import numpy as np
from thinktime.sft.utils.describe_attributes import attribute_prompt


# Tool definition
TOOL_DEFINITION = [{
    "type": "function",
    "function": {
        "name": "get_timeseries_slice",
        "description": "Get the current timeseries_slice of one of the time series in a given location, you should call this tool during thinking to better recognize the local fluctuations of a given period",
        "parameters": {
            "type": "object",
            "properties": {
                "metric_name": {"type": "string", "description": "The name of the metric to get the timeseries slice for"},
                "start": {"type": "integer", "description": "The start index of the timeseries slice"},
                "end": {"type": "integer", "description": "The end index of the timeseries slice"}
            },
            "required": ["metric_name", "start", "end"]
        }
    }
}, {
    "type": "function",
    "function": {
        "name": "compare_timeseries_slice",
        "description": "Compare two time series slices for comparative analysis. Recommended for comparing same series different periods, or different series same periods. Use when analyzing periodicity or comparing different time series patterns.",
        "parameters": {
            "type": "object",
            "properties": {
                "metric_name_1": {"type": "string", "description": "The name of the first metric to compare"},
                "start_1": {"type": "integer", "description": "The start index of the first timeseries slice"},
                "end_1": {"type": "integer", "description": "The end index of the first timeseries slice"},
                "metric_name_2": {"type": "string", "description": "The name of the second metric to compare"},
                "start_2": {"type": "integer", "description": "The start index of the second timeseries slice"},
                "end_2": {"type": "integer", "description": "The end index of the second timeseries slice"}
            },
            "required": ["metric_name_1", "start_1", "end_1", "metric_name_2", "start_2", "end_2"]
        }
    }
}]


# Main tool thinking instruction
TOOL_THINKING_INSTRUCTION = """For a given time series question and answer pair, rewrite the answer to include deep thinking that demonstrates tool-based analysis while keeping the original question and final answer unchanged.

Task: Generate only the THINKING and ANSWER sections, preserving the original question and final answer.

Tool Usage Guidelines:
- Use get_timeseries_slice to examine time series segments  
- Use compare_timeseries_slice to compare two time series segments (same series different periods, or different series same periods)
- Include 0-5 tool calls (can be 0) with <tool_start>...<tool_end> tags
- Each call should be unique, no duplicates
- Integrate tool results naturally into thinking process (including tool calls for reflection check)
- **Slice Length Guidelines**: Vary slice lengths strategically - use 16-64 points for detailed analysis, 8-16 for focused inspection, or larger windows up to full series when needed. Adapt window size to the specific analysis purpose.
- **Window Selection Strategy**: Employ diverse approaches - start from different positions (beginning/middle/end), use overlapping or non-contiguous windows, or target specific patterns you observe
- **Tool Call Patterns**: Vary your analytical approach - sometimes start with comparison, other times with single-slice inspection; occasionally use fewer tools for efficiency or more for thoroughness
- (Important) For trend-related questions of Univariate Time Series about the whole time series, DO NOT call any tools. Just think about the whole timeseries without calling any tools
- (Important) For questions about shape correlation (or trend correlation) between two time series, only call one compare_timeseries_slice tool for the related timeseries from start to end

Deep Thinking Requirements:
- Write from first person perspective ("I need to analyze...", "Let me examine...")
- Include detailed analysis of time series characteristics relevant to the question
- **Enhanced Self-Reflection**: Show continuous self-questioning and verification throughout the analysis:
  * Use phrases like "Wait, I need to verify", "But I still need to check...", "Actually, let me reconsider..."
  * Question your initial observations: "Is this pattern really what I think it is?"
  * Double-check your interpretations: "Let me confirm this by looking at another segment"
  * Show uncertainty and then resolve it: "I'm not entirely sure about this... let me examine more closely"
  * Demonstrate iterative thinking: "My initial thought was X, but after seeing this, I think Y"
  * Show course correction: "Actually, I realize I need to look at this differently"
  * **Vary reflection patterns**: Sometimes doubt window choices, other times question interpretations, occasionally reconsider methodology
- Reference tool call results in your reasoning and reflect on what they reveal
- Use appropriate line breaks between analytical steps
- Avoid repetitive phrases or excessive repetition
- Focus on aspects directly relevant to the question (don't overanalyze irrelevant details)
- **Flexible analytical approaches**: Sometimes jump directly to key analysis, other times build up systematically; vary between broad overview and detailed inspection
- Demonstrate methodical reasoning that builds toward the conclusion through self-questioning
- Aim for substantial analysis (500-1000 characters) but stay focused and relevant

Answer Requirements:
- Provide detailed explanations based on your thinking process and tool call results
- Include relevant insights from tool usage
- Connect your analysis to the final conclusion
- Add context and reasoning that wasn't in the original answer

Output Format: Provide your response using markdown sections with these exact headers:

### THINKING ###
[Your focused first-person thinking process - with 0-4 tool calls integrated naturally. Include verification, self-reflection, and methodical analysis relevant to the question. Should be substantial but not excessively long.]

### ANSWER ###
[Your enhanced answer with detailed explanations based on the thinking process and tool call results. Should provide more insight than the original answer.]

Important Guidelines:
- Think from the user's perspective who only sees the timeseries and question
- Use phrases like "from the timeseries, I can see" rather than "from the given context"  
- Do not reference "context", "requirements", or "guidelines" explicitly in your thinking
- Focus thinking on aspects directly relevant to the question
- For trend questions about whole time series: analyze without tools
- For correlation questions: use one compare tool for full series comparison
- Ensure tool calls provide meaningful insights that inform your reasoning
- Balance thoroughness with relevance - substantial analysis without excessive length
"""

# Lightweight addon injected probabilistically to enable reflection with imperfect tool calls
REFLECTION_ADDON = """
(Reflection Mode)
- You may contain some uninformative or incorrect tool call at start or in the middle part of thinking (e.g., a slice with no local fluctuation, a mismatched comparison, wrong window size, or suboptimal analytical approach). If that happens:
  * Explicitly acknowledge the mismatch ("This slice does not show the expected pattern...", "This window is too small/large...", "This approach isn't revealing...")
  * Explain why it is unhelpful
  * Make one corrective follow-up with a better-chosen range, different approach, or alternative strategy
  * Vary your reflection focus: sometimes question window selection, other times doubt analytical methodology, occasionally reconsider interpretation
- Keep final conclusions consistent with evidence after correction
"""


def generate_tool_thinking_prompt(question: str, answer: str, timeseries: np.ndarray, 
                                description: List[dict], metrics: List[str], 
                                fields: Dict[str, List[int]], corr_pool: List[Tuple[List[int], str]], 
                                instruction: str, ability_type: Optional[str],
                                prompt_with_tool_reflection: Optional[bool] = None,
                                reflection_prob: float = 0.35) -> str:
    """Generate prompt for tool thinking rewrite - customized for different ability types
    Ability types: 'local', 'local-correlation', 'noise', 'trend', 'local-cluster', 'shape-cluster', 
                   'season', 'shape-correlation', 'mts_local', 'mts_shape', None
    """
    given_qa = f"""### QUESTION ###
{question}

### ANSWER ###
{answer}"""

    type_specific_instruction, constraints = get_type_specific_prompt(
        ability_type, metrics,
        len(timeseries[0]) if len(timeseries.shape) > 1 else len(timeseries)
    )
    
    attribute_context = attribute_prompt(timeseries, description, metrics, fields, corr_pool)

    # Decide reflection mode (probabilistic unless explicitly set)
    if prompt_with_tool_reflection is None:
        enable_reflection = bool(np.random.rand() < reflection_prob)
    else:
        enable_reflection = bool(prompt_with_tool_reflection)

    reflection_text = REFLECTION_ADDON if enable_reflection else ""

    context = f"""
CONTEXT (Only for generate QA, not visible to the user who answer the question):
{attribute_context}

INSTRUCTION:
{instruction}{reflection_text}

GIVEN QA:
{given_qa}

{type_specific_instruction}

{constraints}
"""

    return TOOL_THINKING_INSTRUCTION + context


def get_type_specific_prompt(ability_type: Optional[str], metrics: List[str], timeseries_length: int) -> Tuple[str, str]:
    """Get type-specific tool instructions and constraints based on ability_type"""
    
    base_tool_info = f"""
Available Tools:
- get_timeseries_slice: Retrieves a slice of time series data for detailed analysis
  - Parameters: metric_name (choose from {', '.join(metrics)}), start (integer, >= 0), end (integer, < {timeseries_length})
  - Returns: Time series values, statistics, and visualization for the specified range
  - **Flexible Slice Length**: Adapt window size to purpose - 8-16 for focused inspection, 16-64 for standard analysis, larger for broad overview

- compare_timeseries_slice: Compares two time series slices for comparative analysis
  - Parameters: metric_name_1, start_1, end_1, metric_name_2, start_2, end_2 (all integers >= 0, ends < {timeseries_length})
  - Returns: Values and comparative statistics for both slices
  - **Flexible Slice Length**: Adapt window size to purpose - smaller windows for specific pattern comparison, larger for overall trend comparison

Time series length: {timeseries_length}
**Strategic windowing**: Choose window sizes and positions based on your analytical goals - vary between focused inspection and broad analysis
"""
    
    if ability_type == 'local':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Local Fluctuation Analysis):
- Focus on identifying and analyzing ALL local fluctuations (spikes, dips, convex/concave patterns)
- Use get_timeseries_slice to examine different segments for detailed local analysis
- **Flexible call count**: Use 2-4 tools, but adapt based on complexity - fewer for simple patterns, more for complex fluctuation distributions
- **Adaptive windowing**: Start from different time points (beginning/middle/end), use varied window sizes (16-64 points), sometimes target specific visible patterns
- Pay attention to amplitude, duration, and timing of fluctuations
- **Varied analytical approaches**: Sometimes scan systematically, other times target suspected fluctuation zones; occasionally start with broad overview then zoom in
- Use phrases like "Let me examine this spike more closely" or "I should check other segments for similar patterns"
""",
            """
Requirements:
- **Flexible tool usage**: Use 2-4 get_timeseries_slice calls, varying window positions and sizes based on observed patterns
- **Adaptive slice strategy**: Choose window sizes (16-64 points) and positions strategically - sometimes systematic scanning, other times targeted investigation
- Include self-reflection like noticing empty/flat slices and then correcting with a better slice
- **Varied reflection patterns**: Sometimes question window choice, other times doubt pattern interpretation, occasionally reconsider analytical approach
- Focus analysis on local characteristics, not overall trends
- Examine multiple time segments to identify all fluctuations
- Do not mention "context" - act as if you only see the timeseries data
"""
        )
    
    elif ability_type == 'trend':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Trend Analysis):
- Focus on overall increasing/decreasing/stable patterns across the entire timeseries
- NO TOOL CALLS needed - analyze the overall pattern without detailed segmentation
- Look at the general direction and long-term behavior
- Consider slope, monotonicity, and overall trajectory
""",
            """
Requirements:
- DO NOT use any tool calls for trend analysis
- Use self-reflection on ambiguous global patterns, then resolve it without tools
- Focus on overall direction and long-term patterns
- Use phrases like "Looking at the overall pattern..." or "The general trend shows..."
- Do not mention "context" - act as if you only see the timeseries data
"""
        )
    
    elif ability_type == 'season':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Seasonal Pattern Analysis):
- Identify repeating patterns and periodic behaviors
- Use compare_timeseries_slice to compare different periods of the same timeseries
- Look for cyclical patterns, regular intervals, and repeated structures
- **Flexible approach**: Use 1-3 tool calls - sometimes start with suspected period, other times explore different interval hypotheses
- **Varied windowing**: Experiment with different period lengths and alignment strategies based on visual observations
""",
            """
Requirements:
- **Adaptive tool usage**: Use 1-3 compare_timeseries_slice calls, varying period selection strategy
- **Flexible alignment**: Sometimes align periods systematically, other times test hypotheses about cycle length
- Allow initial comparisons to be inconclusive and then correct with better-aligned periods or different intervals
- Look for repeating patterns and cyclical behaviors
- **Varied reflection**: Question period selection, doubt alignment accuracy, or reconsider cycle length hypotheses
- Include self-reflection like "Let me compare this period with an earlier one..."
- Focus on periodicity and seasonal characteristics
- Do not mention "context"
"""
        )
    
    elif ability_type == 'noise':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Noise Analysis):
- Focus on random variations, minor fluctuations, and statistical properties
- Avoid analysis of periods with local fluctuations; correct if the chosen slice contains events
- Use get_timeseries_slice to examine different segments for noise characteristics
- **Adaptive windowing**: Use varied window sizes (16-64 points) and positions - sometimes multiple small windows for comparison, other times fewer larger samples
- **Strategic selection**: Target visually calm regions, but occasionally check eventful areas for comparison before correcting
- Analyze variance, standard deviation, and random components
- **Flexible approach**: Use 1-4 tool calls depending on noise complexity and distribution across the series
""",
            """
Requirements:
- **Flexible tool usage**: Use 1-4 get_timeseries_slice calls, adapting to noise distribution pattern
- **Strategic window selection**: Vary sizes and positions, sometimes comparing calm vs eventful regions before focusing on appropriate areas
- If a slice contains strong local events, acknowledge and switch to a calmer slice
- Focus on statistical properties and minor fluctuations
- **Varied reflection approaches**: Sometimes question window choice, other times doubt statistical interpretation
- Do not mention "context"
"""
        )
    
    elif ability_type == 'local-correlation':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Local Fluctuation Correlation):
- Analyze correlation of local fluctuations between two timeseries (position-based)
- **Flexible workflow**: Sometimes start with single-series analysis then compare, other times jump directly to comparison, occasionally use multiple comparison windows
- **Adaptive windowing**: Use varied approaches - synchronized windows, offset windows for lag analysis, or different window sizes for different aspects
- Allow initial misaligned windows and then correct to synchronized ones, or vice versa
- **Variable tool count**: Use 2-5 tool calls based on correlation complexity and temporal patterns
""",
            """
Requirements:
- **Flexible approach**: Use 2-5 tool calls with varied strategies - sometimes systematic single-then-compare, other times direct multi-series comparison
- **Adaptive correction**: If initial comparisons show weak alignment, vary the correction approach - adjust timing, window size, or analytical focus
- **Varied reflection**: Question synchronization assumptions, doubt window choices, or reconsider correlation methodology
- Focus on timing-based synchronization and positional correlation
- Do not mention "context"
"""
        )
    
    elif ability_type == 'shape-correlation':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Shape/Trend Correlation):
- Analyze overall shape and trend correlation between two timeseries
- Use ONLY ONE compare_timeseries_slice call for the entire timeseries comparison
- Reflect on whether one full-series comparison could hide phase-specific differences, but keep only one call
""",
            """
Requirements:
- Use EXACTLY ONE compare_timeseries_slice call for the full timeseries comparison
- Include reflective doubt but resolve with a clear global conclusion
- Focus on overall shapes and trends
- Do not mention "context"
"""
        )
    
    elif ability_type == 'local-cluster':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Local Fluctuation Clustering):
- Cluster timeseries based on similar local fluctuation patterns
- WORKFLOW: Analyze fluctuations in one series first, then compare with others
- Permit one unhelpful comparison and then correct with a better pairing
- Use tools 3-5 times
""",
            """
Requirements:
- Use 3-5 tool calls: analyze local patterns in one series, then compare with others
- If a pairing is not representative, acknowledge and replace it
- Focus on local fluctuation similarities for clustering
- Do not mention "context"
"""
        )
    
    elif ability_type == 'shape-cluster':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (Shape-based Clustering):
- Cluster timeseries based on overall shape and trend similarities
- Use compare_timeseries_slice to compare overall shapes between different series
- At most one comparison can be admitted as inconclusive; then correct
- Call tools 2-3 times
""",
            """
Requirements:
- Use 2-3 compare_timeseries_slice calls
- Focus on global shape similarities for clustering
- Allow one inconclusive compare then a corrected one
- Do not mention "context"
"""
        )
    
    elif ability_type == 'mts_local':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (MTS Local Analysis - Combination of local-correlation and local-cluster):
- Analyze pairwise local correlations and cluster by local patterns
- WORKFLOW: local analysis → pairwise comparisons → clustering insights
- Allow one misaligned window before correction
- Use tools 3-5 times
""",
            """
Requirements:
- Use 3-5 tool calls combining local correlation and clustering
- If a window choice is poor, state and fix it
- Do not mention "context"
"""
        )
    
    elif ability_type == 'mts_shape':
        return (
            base_tool_info + """
TASK-SPECIFIC GUIDANCE (MTS Shape Analysis - Combination of shape-correlation and shape-cluster):
- Analyze pairwise shape correlations and cluster by overall shapes
- Use tools 2-4 times
- Allow one broad comparison to be noted as insufficient, then add another pairwise compare
""",
            """
Requirements:
- Use 2-4 compare_timeseries_slice calls
- Include one reflective correction if needed
- Focus on global patterns and trends
- Do not mention "context"
"""
        )
    
    else:
        return (
            base_tool_info + """
GENERAL GUIDANCE:
- Use tools when detailed analysis would help answer the question
- **Flexible analytical strategies**: Sometimes start with broad overview, other times dive into specifics; vary between systematic scanning and targeted investigation
- Integrate tool calls naturally into your thinking process
- **Adaptive tool usage**: Choose 0-5 tools based on complexity - simple patterns may need fewer calls, complex phenomena may require more thorough investigation
- **Varied reflection patterns**: Sometimes doubt tool choice, other times question interpretations, occasionally reconsider methodology or window selection
- It is acceptable to make one unhelpful tool call and then correct it
""",
            """
Requirements:
- **Adaptive tool strategy**: Use 0-5 tool calls based on question complexity and analytical needs
- **Varied approaches**: Sometimes systematic analysis, other times intuition-driven investigation; mix different analytical perspectives
- Include self-reflection and one possible correction, but vary the type of reflection (methodology, window choice, interpretation, etc.)
- Keep analysis focused and relevant
- Do not mention "context"
"""
        )


def create_multi_tool_thinking_prompt(data_item: Dict) -> str:
    """Create prompt for generating deep thinking with multiple tool calls using structured format"""
    # Probabilistic reflection unless user explicitly sets the flag in data_item
    reflection_flag = data_item.get("prompt_with_tool_reflection", None)
    
    base_prompt = generate_tool_thinking_prompt(
        data_item['question'], 
        data_item['answer'], 
        data_item['timeseries'], 
        data_item['description'], 
        data_item['metrics'],
        data_item['fields'],
        data_item['corr_pool'],
        data_item['instruction'],
        data_item['ability_type'],
        prompt_with_tool_reflection=reflection_flag
    )
    
    # Example with or without reflection aligned to the same decision
    reflection_enabled = reflection_flag if reflection_flag is not None else (np.random.rand() < 0.6)
    example = get_type_specific_example(data_item['ability_type'], reflection=bool(reflection_enabled))
    
    return base_prompt + example


def get_type_specific_example(ability_type: Optional[str], reflection: bool = False) -> str:
    """Get type-specific examples based on ability_type with optional reflection mode"""
    
    if ability_type == 'local':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Local Fluctuation Analysis with Reflection):
### THINKING ###
I need to identify all the local fluctuations in this time series. I will start near the beginning, though I might be off:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 0, "end": 32}}<tool_end>

This slice looks fairly flat with no clear spikes or convex shapes—this is not helpful for fluctuation analysis. I should adjust to a region with more variability.

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 48, "end": 80}}<tool_end>

Here I see a sharp rise around the midpoints and a small dip afterward. Wait, I should verify if similar local events repeat later.

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 96, "end": 128}}<tool_end>

Now I observe another transient spike followed by a quick return. My initial window was uninformative; these corrected slices reveal multiple local events at distinct positions. I can summarize them coherently.

### ANSWER ###
Across corrected segments, the series has multiple local fluctuations: a clear spike in the 48–80 window, a smaller dip after it, and another spike in 96–128. These localized events are brief and distinct, supporting the final conclusion about multiple local changes.

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Local Fluctuation Analysis):
### THINKING ###
I need to identify all the local fluctuations in this time series. Let me start by examining the beginning:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 0, "end": 40}}<tool_end>

I can see several spikes and dips in this initial segment... But wait, am I being thorough enough? I should examine the middle section to see if there are different patterns:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 50, "end": 90}}<tool_end>

The middle section shows different patterns... Actually, I'm noticing something interesting here. Is this spike at position 65 similar to what I saw earlier? Let me double-check by looking at the final segment to ensure I capture all local behaviors:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 100, "end": 140}}<tool_end>

This final part reveals additional local fluctuations... Wait, I think I need to reconsider my initial assessment. Looking at all three segments together, I'm seeing a pattern that I might have missed initially. Actually, let me think about this more systematically - the fluctuations seem to have different characteristics in each segment.

Taking stock of these segment-wise observations, I now have a coherent view of where and how the local changes occur. I'm ready to summarize the findings clearly.

### ANSWER ###
Based on my detailed analysis of different segments, the time series exhibits multiple local fluctuations including [specific patterns identified through tool calls]...

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'trend':
        return """
# EXAMPLE OUTPUT FORMAT (Trend Analysis):
### THINKING ###
Looking at the overall pattern of this time series, I need to identify the general trend direction. Initially it feels slightly increasing, but is that due to a few peaks? I should focus on start-to-end levels and the average trajectory. From the entire series, the beginning values compared to the ending values indicate a consistent [increasing/decreasing/stable] tendency despite short-term noise.

### ANSWER ###
The time series demonstrates a clear [trend type] trend. The overall direction from early to late points supports this conclusion, with local variations not overturning the global trajectory.

# Your Task: Generate THINKING and ANSWER sections following this pattern (NO TOOL CALLS for trend analysis).
"""
    
    elif ability_type == 'season':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Seasonal Pattern Analysis with Reflection):
### THINKING ###
I need to identify seasonal or periodic patterns. I will compare two periods, though I might choose poorly at first:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 32, "metric_name_2": "cpu_usage", "start_2": 40, "end_2": 72}}<tool_end>

These windows are offset oddly; the shapes do not align well—this is inconclusive. I should compare periods spaced by the suspected cycle length.

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 32, "metric_name_2": "cpu_usage", "start_2": 64, "end_2": 96}}<tool_end>

Now the peaks and troughs line up more consistently, indicating a repeating pattern roughly every ~64 points.

### ANSWER ###
The series exhibits a seasonal pattern with a period near one cycle per ~64 points; aligned period comparisons reveal repeating peaks and troughs after correcting the initial misaligned windows.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Seasonal Pattern Analysis):
### THINKING ###
I need to identify seasonal or periodic patterns in this time series. Let me compare different periods to see if there are repeating structures:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 32, "metric_name_2": "cpu_usage", "start_2": 64, "end_2": 96}}<tool_end>

The comparison shows [seasonal pattern analysis]... Let me verify this potential periodicity by comparing another set of periods:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 32, "end_1": 64, "metric_name_2": "cpu_usage", "start_2": 96, "end_2": 128}}<tool_end>

This confirms [seasonal pattern confirmation]... The repeating structure suggests a seasonal pattern with [period description].

### ANSWER ###
The time series exhibits seasonal patterns with [detailed seasonal analysis based on period comparisons]...

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'noise':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Noise Analysis with Reflection):
### THINKING ###
I need to analyze noise levels. I will sample a segment, but I must avoid windows containing strong events:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 32, "end": 64}}<tool_end>

This segment contains a noticeable spike—bad choice for pure noise. I should pick a calmer region.

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 80, "end": 112}}<tool_end>

Variance looks moderate with small random jitter. To confirm consistency:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 120, "end": 152}}<tool_end>

Noise amplitude appears similar here, supporting a stable noise level.

### ANSWER ###
After excluding eventful windows, the noise shows a consistent moderate variance across calm segments, indicating stable minor fluctuations around the baseline.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Noise Analysis):
### THINKING ###
I need to analyze the noise characteristics and random variations in this time series. Let me examine different segments to assess noise levels:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 0, "end": 50}}<tool_end>

This segment shows certain variance and minor fluctuations... But am I interpreting this correctly? Is what I'm seeing actually noise or could it be small-scale patterns? Let me check another period to compare noise levels:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 60, "end": 110}}<tool_end>

With variance patterns cross-checked across segments, I can now consolidate the noise profile into a clear conclusion.

### ANSWER ###
The time series exhibits noise with [detailed noise analysis including statistical properties identified through multiple segment examinations]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'local-correlation':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Local Fluctuation Correlation with Reflection):
### THINKING ###
I need to assess synchronization of local events across two series. I will start with a window that might be misaligned:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 20, "end": 52}}<tool_end>

I see a small bump near the end. Now compare with the other series in the same window:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 20, "end_1": 52, "metric_name_2": "memory_usage", "start_2": 20, "end_2": 52}}<tool_end>

Patterns look weakly aligned—likely miswindowed. I should shift to where fluctuations are stronger.

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 64, "end_1": 96, "metric_name_2": "memory_usage", "start_2": 64, "end_2": 96}}<tool_end>

Now peaks align more clearly, indicating synchronized local events.

### ANSWER ###
After correcting an initial misaligned window, the two series show synchronized local fluctuations within 64–96, supporting a positive local correlation in event timing.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Local Fluctuation Correlation):
### THINKING ###
I need to analyze the correlation of local fluctuations between two time series. But first, let me think about what I'm really looking for here - is it about synchronized peaks and valleys, or temporal lag relationships? I'll start by examining the fluctuation patterns in the first series:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 20, "end": 60}}<tool_end>

The first series shows fluctuations at [specific positions and patterns]... Hmm, but am I being too narrow in my selection? Maybe I should examine a larger window to better understand the fluctuation patterns. Let me compare this with the corresponding period in the second series to check for synchronized fluctuations:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 20, "end_1": 60, "metric_name_2": "memory_usage", "start_2": 20, "end_2": 60}}<tool_end>

The comparison reveals [correlation analysis of local fluctuations]... Wait, I'm starting to see a pattern, but is this truly representative? I might be cherry-picking periods. Let me check another time period to verify this correlation pattern - and be honest about whether I'm confirming a bias:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 80, "end_1": 120, "metric_name_2": "memory_usage", "start_2": 80, "end_2": 120}}<tool_end>

Interesting... this second comparison shows [different/similar patterns]. Am I being consistent in my analysis criteria? Actually, let me reconsider what correlation means here - am I looking at magnitude correlation or temporal correlation? I think I need to be clearer about my methodology. The local fluctuations show [synchronized/unsynchronized] behavior, but I should acknowledge the limitations of this sampling approach.

After confirming these windows, I have a consistent view of how the two series co-move locally. I can now state the correlation judgment succinctly.

### ANSWER ###
The local fluctuation correlation analysis shows [detailed correlation analysis based on position-based comparison]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'shape-correlation':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Shape/Trend Correlation with Reflection):
### THINKING ###
I need to judge global shape correlation. A single full-length comparison should suffice, but I must acknowledge it may hide phase-specific differences:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "memory_usage", "start_2": 0, "end_2": 200}}<tool_end>

The overall trajectories are similar in direction though amplitudes differ. While I cannot check sub-phases with extra calls here, the full-series evidence supports a global similarity.

### ANSWER ###
Using a single full-series comparison, the two series share broadly similar shapes and trend directions, indicating positive global shape correlation despite possible phase-specific variations.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Shape/Trend Correlation):
### THINKING ###
I need to analyze the overall shape and trend correlation between two time series. But let me pause first - what do I mean by "shape" exactly? Am I looking at the general trajectory, the scale-normalized patterns, or the raw value relationships? I think I should focus on the overall structural similarities. Let me compare their complete shapes:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "memory_usage", "start_2": 0, "end_2": 200}}<tool_end>

The comparison of the entire series shows [global shape and trend correlation analysis]. The overall trajectories [correlation description] and the general patterns [similarity/difference analysis]. But wait, am I being too simplistic here? Should I be considering different phases of the time series separately? The shapes might correlate in some periods but not others. 

Actually, looking at this holistically, I think the single comparison gives me a reasonable overview of the structural relationship, though I acknowledge this approach might miss temporal variations in correlation strength.

Given this holistic comparison, I'm ready to deliver a clear correlation assessment.

### ANSWER ###
The shape correlation analysis reveals [detailed global pattern correlation based on complete series comparison]

# Your Task: Generate THINKING and ANSWER sections following this pattern (USE ONLY ONE compare tool call).
"""
    
    elif ability_type == 'local-cluster':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Local Fluctuation Clustering with Reflection):
### THINKING ###
I will analyze local patterns in a reference series and compare with others. First, inspect a candidate window:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 24, "end": 56}}<tool_end>

Local spikes are modest. Compare with disk_io, but this pairing might be weak:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 24, "end_1": 56, "metric_name_2": "disk_io", "start_2": 24, "end_2": 56}}<tool_end>

Similarity is low; not ideal for clustering. Let me try network_io which shows stronger burstiness:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 48, "end_1": 80, "metric_name_2": "network_io", "start_2": 48, "end_2": 80}}<tool_end>

This pairing aligns better in local bursts, suggesting a cluster with similar eventful behavior.

### ANSWER ###
After discarding a weak pairing, cpu_usage and network_io group together by similar localized bursts, whereas disk_io differs, yielding a two-cluster interpretation for local patterns.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Local Fluctuation Clustering):
### THINKING ###
I need to cluster time series based on their local fluctuation patterns. But first, let me think about what makes a good clustering approach - should I be looking at variance patterns, frequency characteristics, or amplitude distributions? I'll start with examining local patterns in one series:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 30, "end": 70}}<tool_end>

This series shows [local pattern description]... Hmm, but is this time window representative of the overall local behavior? Maybe I should have chosen a different period. Anyway, let me compare with another series to see if they share similar local behaviors:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 30, "end_1": 70, "metric_name_2": "disk_io", "start_2": 30, "end_2": 70}}<tool_end>

The comparison shows [similarity/difference in local patterns]... Wait, am I being consistent in my comparison criteria? I need to be clear about what aspects I'm comparing. Let me check a third series to continue the clustering analysis:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 30, "end_1": 70, "metric_name_2": "network_io", "start_2": 30, "end_2": 70}}<tool_end>

This allows me to group [clustering results based on local pattern similarities].

### ANSWER ###
Based on local fluctuation patterns, the time series can be clustered into [detailed clustering analysis based on local behavior comparisons]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'shape-cluster':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (Shape-based Clustering with Reflection):
### THINKING ###
I will compare global shapes to form clusters. First compare cpu_usage vs memory_usage:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "memory_usage", "start_2": 0, "end_2": 200}}<tool_end>

They share an overall rising pattern. I tried cpu_usage vs disk_io next but found the similarity weak, so I will compare memory_usage vs network_io for grouping:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "memory_usage", "start_1": 0, "end_1": 200, "metric_name_2": "network_io", "start_2": 0, "end_2": 200}}<tool_end>

These two also show compatible trajectories, suggesting one cluster, with disk_io separate.

### ANSWER ###
Clustering by global shapes yields a group with cpu_usage and memory_usage (and possibly network_io), while disk_io forms a distinct cluster due to differing trajectory.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (Shape-based Clustering):
### THINKING ###
I need to cluster time series based on their overall shapes and trends. But what exactly constitutes "shape similarity"? Should I focus on trend direction, curvature patterns, or scale-normalized comparisons? Let me start with comparing the global patterns between different series:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "memory_usage", "start_2": 0, "end_2": 200}}<tool_end>

These two series show [global pattern comparison]... But am I being too broad in my comparison? Maybe I should focus on specific shape characteristics. Now let me compare with another series to see grouping patterns:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "disk_io", "start_2": 0, "end_2": 200}}<tool_end>

This comparison reveals [shape-based similarity analysis].

I think I can give the final clustering insights now.

### ANSWER ###
The shape-based clustering analysis shows [detailed clustering based on global pattern similarities]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'mts_local':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (MTS Local Analysis with Reflection):
### THINKING ###
I will analyze local patterns, then pairwise correlations, then grouping. Start with a possibly quiet window:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 40, "end": 72}}<tool_end>

This is too flat—poor for local analysis. I will shift to a more active region:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 64, "end": 96}}<tool_end>

Now I see clear bursts. Compare with memory_usage:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 64, "end_1": 96, "metric_name_2": "memory_usage", "start_2": 64, "end_2": 96}}<tool_end>

They co-fluctuate. For clustering, compare with disk_io:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 64, "end_1": 96, "metric_name_2": "disk_io", "start_2": 64, "end_2": 96}}<tool_end>

Disk_io differs locally, separating into another group.

### ANSWER ###
After correcting an initial quiet window, cpu_usage and memory_usage align in local bursts and form one cluster; disk_io diverges, suggesting distinct local behavior.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (MTS Local Analysis):
### THINKING ###
I need to analyze both local correlations and clustering patterns in this multi-time series. This is a complex task - should I prioritize correlation analysis or clustering? Let me think about how these two aspects interact. First, let me examine local patterns in one series:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 40, "end": 80}}<tool_end>

This series shows [local pattern analysis]... But am I choosing the right time window for this analysis? The local behavior might vary significantly across different periods. Now let me compare with another series for correlation analysis:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 40, "end_1": 80, "metric_name_2": "memory_usage", "start_2": 40, "end_2": 80}}<tool_end>

The pairwise comparison shows [local correlation analysis]... Interesting, but I'm wondering if this correlation is consistent across the entire series or just this specific window. Let me extend this to another series for clustering insights:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 40, "end_1": 80, "metric_name_2": "disk_io", "start_2": 40, "end_2": 80}}<tool_end>

This allows me to both understand correlations and group series by [combined analysis of correlations and clusters]

### ANSWER ###
The MTS local analysis reveals both correlation patterns and clustering structures based on [detailed analysis combining correlation and clustering perspectives]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    elif ability_type == 'mts_shape':
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (MTS Shape Analysis with Reflection):
### THINKING ###
I will compare global shapes for correlation and clustering. Start with cpu_usage vs memory_usage:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "memory_usage", "start_2": 0, "end_2": 200}}<tool_end>

They align in direction. I initially considered cpu_usage vs disk_io but the similarity was weak, so I check memory_usage vs network_io:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "memory_usage", "start_1": 0, "end_1": 200, "metric_name_2": "network_io", "start_2": 0, "end_2": 200}}<tool_end>

This reveals another compatible pair, informing clusters.

### ANSWER ###
Global-shape analysis suggests cpu_usage with memory_usage (and possibly network_io) cluster together, while disk_io separates due to a different trajectory.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (MTS Shape Analysis):
### THINKING ###
I need to analyze both shape correlations and clustering patterns across multiple time series. This is challenging because I'm looking at global patterns from two different analytical perspectives. Let me start by comparing global shapes:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "memory_usage", "start_2": 0, "end_2": 200}}<tool_end>

The shape correlation between these series shows [global correlation analysis]... But am I interpreting the relationship correctly? Let me compare with another series to understand clustering patterns:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 200, "metric_name_2": "network_io", "start_2": 0, "end_2": 200}}<tool_end>

This reveals [shape-based similarity for clustering]... Hmm, I'm starting to see patterns, but am I being consistent in how I define shape similarity across these comparisons? Let me also compare the second and third series directly:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "memory_usage", "start_1": 0, "end_1": 200, "metric_name_2": "network_io", "start_2": 0, "end_2": 200}}<tool_end>

This completes the shape analysis for both correlation and clustering perspectives [combined analysis]

### ANSWER ###
The MTS shape analysis shows both correlation patterns and clustering structures based on [detailed analysis combining shape correlation and clustering perspectives]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
    
    else:
        if reflection:
            return """
# EXAMPLE OUTPUT FORMAT (General Analysis with Reflection):
### THINKING ###
I will start with an initial segment, but it may be uninformative:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 0, "end": 32}}<tool_end>

The tool call results look too steady to explain the phenomenon. I need a livelier segment.

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 48, "end": 80}}<tool_end>

Now I see stronger movement. To confirm the change, compare with the earlier calm period:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 32, "metric_name_2": "cpu_usage", "start_2": 48, "end_2": 80}}<tool_end>

The later window clearly shows higher variability, supporting the conclusion.

### ANSWER ###
After correcting an initial flat window, analysis shows increased variability in the later segment, which aligns with the final answer about changing behavior.
"""
        else:
            return """
# EXAMPLE OUTPUT FORMAT (General Analysis):
### THINKING ###
I need to systematically analyze this time series. But what should I focus on first - overall trends, local patterns, or specific anomalies? Let me start by examining the initial segment:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 0, "end": 32}}<tool_end>

The initial values show [analysis of results]... But wait, am I drawing conclusions too quickly from just this segment? I should verify this pattern by checking a middle segment:

<tool_start>{"name": "get_timeseries_slice", "arguments": {"metric_name": "cpu_usage", "start": 40, "end": 72}}<tool_end>

Interesting, the middle section reveals [analysis]... Hmm, these two segments seem different. Am I cherry-picking time windows that confirm my hypothesis? Let me now compare these two periods directly to confirm my observation:

<tool_start>{"name": "compare_timeseries_slice", "arguments": {"metric_name_1": "cpu_usage", "start_1": 0, "end_1": 32, "metric_name_2": "cpu_usage", "start_2": 40, "end_2": 72}}<tool_end>

The comparison confirms [comparison analysis]. This provides the complete picture needed for understanding the time series behavior, within the constraints of my sampling approach.

### ANSWER ###
The time series exhibits [detailed explanation based on systematic verification through multiple tool calls]

# Your Task: Generate THINKING and ANSWER sections following this pattern.
"""
