import json

def parse_thinking_results(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        processed_spans = []

        for item in data:
            if "output" not in item:
                continue

            output = item["output"]
            think_delimiter = "</think>\n\n"
            if think_delimiter in output:
                last_delimiter_pos = output.rfind(think_delimiter)
                parsed_output = output[last_delimiter_pos + len(think_delimiter) :]
            else:
                parsed_output = output

            explanation_patterns = [
                "\n\nExplanation",
                "\n\n**Explanation",
                "\n\n**Final Answer",
                "\n\n**Reasoning",
                "\n\n### Explanation",
                "\n\n---",
                "\n\n**Note",
            ]
            for pattern in explanation_patterns:
                if pattern in parsed_output:
                    explanation_pos = parsed_output.find(pattern)
                    parsed_output = parsed_output[:explanation_pos]
                    break

            processed_spans.append(parsed_output)
        return processed_spans

    except Exception as e:
        print(f"Error in {file_path} as: {str(e)}")
        # return None


TEMPLATE_THINKING = """{source_lang} source:
```{source_seg}```
{target_lang} translation:
```{target_seg}```

Based on the source segment and machine translation surrounded with triple backticks, identify error types in the translation, classify them and give a score. The categories of errors are: accuracy (addition, mistranslation, omission, untranslated text), fluency (character encoding, grammar, inconsistency, punctuation, register, spelling), style (awkward), terminology (inappropriate for context, inconsistent use), non-translation, other, or no-error.\nEach error is classified as one of three categories: critical, major, and minor. Critical errors inhibit comprehension of the text. Major errors disrupt the flow, but what the text is trying to say is still understandable. Minor errors are technically errors, but do not disrupt the flow or hinder comprehension.\nFor each error span, please deduct corresponding points based on following reference points: -25="Cirtical", -5="Major", -1="Minor", -0.1="Minor/Fluency/Punctuation:0.1", 0="No-error"."""


TEMPLATE_MQM_THINKING = [{"role": "user", "content": TEMPLATE_THINKING}]
