import os
import time
import json
import csv
from datetime import datetime
from openai import OpenAI
import pytz
from typing import List, Dict, Any
from tqdm import tqdm  

class DeepSeekCoTGenerator:
    def __init__(self):
        self.start_time = time.time()
        try:
            self.client = OpenAI(
                api_key="xxx",
                base_url="xxx"
            )
        except Exception as e:
            print(f"Initialization failed: {str(e)}")
            raise

        self.total_prompt_tokens = 0
        self.total_completion_tokens = 0
        self.total_cost = 0.0
        self.total_saved = 0  # Track total saved records
        self.output_file = "xxx.json"
        self.temp_file = "xxx.json"  # Temporary file for periodic saves

    def _get_beijing_time(self) -> datetime:
        utc_now = datetime.utcnow().replace(tzinfo=pytz.utc)
        return utc_now.astimezone(pytz.timezone('Asia/Shanghai'))

    def _calculate_cost(self, prompt_tokens: int, completion_tokens: int) -> float:
        beijing_time = self._get_beijing_time()
        current_minute = beijing_time.hour * 60 + beijing_time.minute

        if 30 <= current_minute < 510:
            input_rate = 1 / 1_000_000
            output_rate = 4 / 1_000_000
        else:
            input_rate = 4 / 1_000_000
            output_rate = 16 / 1_000_000

        return (prompt_tokens * input_rate) + (completion_tokens * output_rate)
    
    def _show_stats(self, usage: dict, cost: float):
        self.total_prompt_tokens += usage.prompt_tokens
        self.total_completion_tokens += usage.completion_tokens
        self.total_cost += cost

        print(f"\n[Current Stats]")
        print(f"Input tokens: {usage.prompt_tokens} | Output tokens: {usage.completion_tokens}")
        print(f"Current cost: ¥{cost:.6f}")

        print(f"\n[Total Stats]")
        print(f"Total input tokens: {self.total_prompt_tokens}")
        print(f"Total output tokens: {self.total_completion_tokens}")
        print(f"Total cost: ¥{self.total_cost:.6f}\n")
    
    def _save_results(self, results: List[Dict[str, Any]], batch_mode: bool = False):
        """Save results to file, with batch mode support"""
        if batch_mode:
            # In batch mode, append to temp file
            with open(self.temp_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
        else:
            # Final save to main output file
            with open(self.output_file, 'w', encoding='utf-8') as f:
                json.dump(results, f, ensure_ascii=False, indent=2)
    
    def generate_long_cot(self, text: str, label: int, num_samples: int = 5) -> Dict[str, Any]:
        """Generate CoT responses with different reasoning patterns in English"""
        valid_responses = []
        sarcasm_definition = "(1=sarcasm: contains features like surface praise with underlying criticism, contextual incongruity, exaggerated contrast, etc.)"
        for i in range(num_samples):
            try:
                if i < 2:  
                    reasoning_type = "non-linear"
                    length = "Long"
                    prompt = (
                        f"""Perform rigorous sentiment analysis reasoning please strictly follow the structured reasoning process. The reasoning framework includes the following optional components:
                        【Reasoning Framework】
                          Decomposition: Break down text elements (semantics/context/rhetoric)
                          Reflection: Question initial assumptions and verify their rationality
                          Verification: Cross-check logical consistency
                          Transition: Handle contradictory information (using "however" - like analysis)
                          Retry: Correct the reasoning path when errors are found

                          Process Requirements:
                          → Must include ≥ 5 reasoning steps, freely combining the above components, without limitation on the number of times or order, and also free to explore other reasoning methods.
                          → Must analyze the sentiment of this text using non-linear reasoning (e.g., Tree/Graph of Thought).
                          → Each step must clearly indicate the type of reasoning used (e.g., 【Step 1 - Decomposition】).
                          → At least two verification stages must be included:
                            - Preliminary conclusion verification
                            - Final decision verification
                          → Contradictions in the text must be addressed (demonstrating the use of "however" - like analysis).
                          → Error correction must show the complete adjustment of the reasoning path.
                          → Final conclusion must align with: {sarcasm_definition}

                          Error Checkpoints:
                          ✓ Rhetorical analysis completeness check
                          ✓ Contextual factor weight validation
                          ✓ Counterfactual outcome consistency verification

                          Text content: "{text}"
                          Conclude with "Therefore, the sentiment label is: {label}" (1=sarcasm, 0=none)"""
                    )
                elif i == 2:  
                    reasoning_type = "linear"
                    length = "Short"
                    prompt = (
                        f"Analyze the sentiment of this text step by step. Requirements:\n"
                        f"1. Use simple chain of thought without specific reasoning process\n"
                        f"2. Final conclusion must align with: {sarcasm_definition}\n"
                        f"3. Text content: {text}\n"
                        f"4. Conclude with Therefore, the sentiment label is: {label} (1=sarcasm, 0=none)\n"
                    )
                elif i == 3: 
                    reasoning_type = "linear"
                    length = "Long"
                    prompt = (
                        f"""Perform rigorous sentiment analysis reasoning please strictly follow the structured reasoning process. The reasoning framework includes the following optional components:
                        【Reasoning Framework】
                          Decomposition: Break down text elements (semantics/context/rhetoric)
                          Reflection: Question initial assumptions and verify their rationality
                          Verification: Cross-check logical consistency
                          Transition: Handle contradictory information (using "however" - like analysis)
                          Retry: Correct the reasoning path when errors are found

                          Process Requirements:
                          → Must include ≥ 5 reasoning steps, freely combining the above components, without limitation on the number of times or order, and also free to explore other reasoning methods.
                          → Must explore at least TWO different paths using 'alternatively...' comparisons"
                          → Each step must clearly indicate the type of reasoning used (e.g., 【Step 1 - Decomposition】).
                          → At least two verification stages must be included:
                            - Preliminary conclusion verification
                            - Final decision verification
                          → Contradictions in the text must be addressed (demonstrating the use of "however" - like analysis).
                          → Error correction must show the complete adjustment of the reasoning path.
                          → Final conclusion must align with: {sarcasm_definition}

                          Error Checkpoints:
                          ✓ Rhetorical analysis completeness check
                          ✓ Contextual factor weight validation
                          ✓ Counterfactual outcome consistency verification

                          Text content: "{text}"
                          Conclude with "Therefore, the sentiment label is: {label}" (1=sarcasm, 0=none)"""
                    )
                else:  
                    reasoning_type = "linear"
                    length = "Long"
                    prompt = (
                        f"""Perform rigorous sentiment analysis reasoning please strictly follow the structured reasoning process. The reasoning framework includes the following optional components:
                        【Reasoning Framework】
                          Decomposition: Break down text elements (semantics/context/rhetoric)
                          Reflection: Question initial assumptions and verify their rationality
                          Verification: Cross-check logical consistency
                          Transition: Handle contradictory information (using "however" - like analysis)
                          Retry: Correct the reasoning path when errors are found

                          Process Requirements:
                          → Must include ≥ 5 reasoning steps, freely combining the above components, without limitation on the number of times or order, and also free to explore other reasoning methods.
                          → Each step must clearly indicate the type of reasoning used (e.g., 【Step 1 - Decomposition】).
                          → At least two verification stages must be included:
                            - Preliminary conclusion verification
                            - Final decision verification
                          → Contradictions in the text must be addressed (demonstrating the use of "however" - like analysis).
                          → Error correction must show the complete adjustment of the reasoning path.
                          → Final conclusion must align with: {sarcasm_definition}

                          Error Checkpoints:
                          ✓ Rhetorical analysis completeness check
                          ✓ Contextual factor weight validation
                          ✓ Counterfactual outcome consistency verification

                          Text content: "{text}"
                          Conclude with "Therefore, the sentiment label is: {label}" (1=sarcasm, 0=none)"""
                    )
    
                response = self.client.chat.completions.create(
                    model="xxx",
                    messages=[
                        {"role": "system", "content": "You are an expert in complex sentiment analysis with multi-step reasoning."},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.7,
                    max_tokens=8192,
                    stream=False
                )
                
                reply = response.choices[0].message.content
                usage = response.usage
                cost = self._calculate_cost(usage.prompt_tokens, usage.completion_tokens)
                self._show_stats(usage, cost)
        
                if f"sentiment label is: {label}" in reply.lower():
                    valid_responses.append({
                        "content": reply,
                        "reasoning_type": reasoning_type,
                        "length": length
                    })
                
            except Exception as e:
                print(f"Error generating CoT: {str(e)}")
                continue
        
        return {
            "text": text,
            "Label": label,
            "CoT_Responses": valid_responses
        }
    
    def load_dataset_from_csv(self, file_path: str) -> List[Dict[str, Any]]:
        """Load dataset from CSV with new column names and handle class mapping"""
        dataset = []
        class_mapping = {"sarc": 1, "notsarc": 0}  
        try:
            with open(file_path, mode='r', encoding='utf-8') as file:
                reader = csv.DictReader(file)
                for row in reader:
                    class_label = class_mapping.get(row["class"].strip().lower(), -1)
                    if class_label == -1:
                        print(f"Warning: Unknown class value '{row['class']}' in row {row['id']}, skipping...")
                        continue

                    dataset.append({
                        "id": row["id"],
                        "class": class_label, 
                        "text": row["text"]
                    })
            print(f"Loaded {len(dataset)} records from {file_path}")
        except Exception as e:
            print(f"Error loading dataset: {str(e)}")
            raise
        return dataset
    
    def process_dataset(self, input_csv: str):
        """Process dataset and save results periodically"""
        dataset = self.load_dataset_from_csv(input_csv)
        results = []
        
        # 使用tqdm创建进度条
        with tqdm(total=len(dataset) - 2500, desc="Processing Items", unit="item") as pbar:
            for i, item in enumerate(dataset, 2500):  # Start counting from 1
                print(f"\nProcessing item {item['id']} ({i}/{len(dataset)})...")
                result = self.generate_long_cot(item["text"], item["class"])
                
                if result["CoT_Responses"]:
                    for cot in result["CoT_Responses"]:
                        record = {
                            "text": result["text"],
                            "CoT": cot["content"],
                            "Label": result["Label"],
                            "ReasoningType": cot["reasoning_type"],
                            "Length": cot["length"]
                        }
                        results.append(record)
                        self.total_saved += 1
                        
                        # Print current and total saved counts
                        print(f"\nSaved record {len(results)} (Total saved: {self.total_saved})")
                        print(json.dumps(record, indent=2, ensure_ascii=False))
                
                pbar.update(1)
                
                # Save every 10 samples or at the end
                if i % 5 == 0 or i == len(dataset):
                    self._save_results(results, batch_mode=True)
                    print(f"\n=== Progress Update ===")
                    print(f"Saved batch of {len(results)} records (Total saved: {self.total_saved})")
                    print(f"Processed {i}/{len(dataset)} items ({i/len(dataset)*100:.1f}%)")
                    print(f"Temporary results saved to {self.temp_file}")
        
        # Final save to the main output file
        self._save_results(results)
        print(f"\nProcessing completed. Results saved to {self.output_file}")
        print(f"Total valid responses: {self.total_saved}")
    
    def __del__(self):
        if hasattr(self, 'start_time'):
            total_time = time.time() - self.start_time
            hours, remainder = divmod(total_time, 3600)
            minutes, seconds = divmod(remainder, 60)
            print(f"\nTotal runtime: {int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}")

if __name__ == "__main__":
    try:
        cot_generator = DeepSeekCoTGenerator()
        input_csv = "xxx.csv"  # Update this path as needed
        cot_generator.process_dataset(input_csv)
    except Exception as e:
        print(f"System error: {str(e)}")