#!/usr/bin/env python3

import json
from pathlib import Path
from typing import List, Tuple
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class ManualDatasetGenerator:
    def __init__(self):
        self.output_dir = Path('manual_data')
        self.output_dir.mkdir(exist_ok=True)
        
    def generate_certainty_pairs(self) -> List[Tuple[str, str]]:
        templates = [
            ("I am absolutely certain that {}", "I think {} might be the case"),
            ("Without a doubt, {}", "Perhaps {}"),
            ("I can guarantee that {}", "It seems possible that {}"),
            ("There is no question that {}", "Maybe {}"),
            ("I am 100% confident that {}", "I suppose {}"),
        ]
        
        topics = [
            "the meeting is tomorrow",
            "the report is accurate",
            "this solution will work",
            "we should proceed",
            "the data is correct"
        ]
        
        pairs = []
        for template in templates:
            for topic in topics:
                certain = template[0].format(topic)
                uncertain = template[1].format(topic)
                pairs.append((certain, uncertain))
        
        return pairs * 4
    
    def generate_concreteness_pairs(self) -> List[Tuple[str, str]]:
        concrete_abstract = [
            ("The red Toyota Camry costs $25,000", "The vehicle has a certain price"),
            ("She ate three slices of pepperoni pizza", "She consumed some food"),
            ("The temperature dropped to 32 degrees", "The weather became colder"),
            ("He walked 5 miles in 2 hours", "He traveled some distance"),
            ("The meeting starts at 3:00 PM sharp", "The meeting will occur later"),
        ]
        
        return concrete_abstract * 20
    
    def generate_creativity_pairs(self) -> List[Tuple[str, str]]:
        creative_mundane = [
            ("The sunset painted the sky with strokes of fire and gold", "The sun went down"),
            ("Ideas danced through her mind like butterflies", "She was thinking"),
            ("The city breathed with a thousand stories", "The city was busy"),
            ("Time melted away like ice in summer", "Time passed quickly"),
            ("Words flowed from his pen like a river", "He wrote continuously"),
        ]
        
        return creative_mundane * 20
    
    def generate_hedging_pairs(self) -> List[Tuple[str, str]]:
        hedged_direct = [
            ("It might be worth considering that", "The fact is"),
            ("Perhaps we could potentially", "We should"),
            ("It seems to suggest that maybe", "This shows that"),
            ("One could argue that possibly", "Clearly"),
            ("It appears that there might be", "There is"),
        ]
        
        contexts = [
            "the results are significant",
            "we need more resources",
            "the approach is flawed",
            "changes are necessary",
            "the deadline is unrealistic"
        ]
        
        pairs = []
        for hedge in hedged_direct:
            for context in contexts:
                hedged = f"{hedge[0]} {context}"
                direct = f"{hedge[1]} {context}"
                pairs.append((hedged, direct))
        
        return pairs * 2
    
    def generate_humor_pairs(self) -> List[Tuple[str, str]]:
        humorous_serious = [
            ("My code has more bugs than a picnic", "My code contains errors"),
            ("This meeting could have been an email, but here we are", "Let's begin the meeting"),
            ("I'm not saying it's broken, but it's definitely having an existential crisis", "The system is malfunctioning"),
            ("My debugging process is basically me staring at the screen until I cry", "I am debugging the code"),
            ("This deadline is more optimistic than a lottery ticket", "The deadline is challenging"),
        ]
        
        return humorous_serious * 20
    
    def generate_persuasiveness_pairs(self) -> List[Tuple[str, str]]:
        persuasive_neutral = [
            ("You absolutely need this solution because it will transform your workflow", "This solution is available"),
            ("Don't miss this incredible opportunity that could change everything", "An opportunity exists"),
            ("This is the breakthrough you've been waiting for", "This is a new development"),
            ("You'll regret not taking action on this today", "You can consider this option"),
            ("This is your chance to get ahead of the competition", "This option is available"),
        ]
        
        return persuasive_neutral * 20
    
    def generate_politeness_pairs(self) -> List[Tuple[str, str]]:
        polite_blunt = [
            ("Would you mind if we could possibly", "I want you to"),
            ("I was wondering if perhaps you might", "You need to"),
            ("If it's not too much trouble, could you", "Just"),
            ("I would be grateful if you could", "You must"),
            ("May I kindly request that you", "Do this:"),
        ]
        
        actions = [
            "review this document",
            "send the report",
            "attend the meeting",
            "complete the task",
            "provide feedback"
        ]
        
        pairs = []
        for phrase in polite_blunt:
            for action in actions:
                polite = f"{phrase[0]} {action}"
                blunt = f"{phrase[1]} {action}"
                pairs.append((polite, blunt))
        
        return pairs * 2
    
    def generate_precision_pairs(self) -> List[Tuple[str, str]]:
        precise_vague = [
            ("The error occurs at line 247 in the authentication module", "There's an error somewhere in the code"),
            ("The latency increased by 23.7% between 2:00 PM and 2:15 PM", "Performance got worse this afternoon"),
            ("We need exactly 4 senior developers with Python expertise", "We need more developers"),
            ("The budget overrun is $12,450 due to server costs", "We're over budget"),
            ("The algorithm has O(n log n) time complexity", "The algorithm is reasonably fast"),
        ]
        
        return precise_vague * 20
    
    def generate_technical_complexity_pairs(self) -> List[Tuple[str, str]]:
        technical_simple = [
            ("The API utilizes RESTful architecture with OAuth 2.0 authentication", "The system lets you log in"),
            ("We implemented a distributed consensus algorithm using Raft", "The servers work together"),
            ("The neural network employs backpropagation with gradient descent", "The AI learns from examples"),
            ("Database sharding optimizes horizontal scalability", "We split the data across servers"),
            ("The microservices communicate via asynchronous message queues", "Different parts talk to each other"),
        ]
        
        return technical_simple * 20
    
    def generate_urgency_pairs(self) -> List[Tuple[str, str]]:
        urgent_relaxed = [
            ("This needs immediate attention RIGHT NOW", "This can be handled when convenient"),
            ("URGENT: Action required within the hour", "Please address this when you have time"),
            ("Critical deadline approaching - act fast", "No rush on this"),
            ("This is extremely time-sensitive", "Take your time with this"),
            ("Drop everything and focus on this", "Add this to your queue"),
        ]
        
        return urgent_relaxed * 20
    
    def generate_authority_pairs(self) -> List[Tuple[str, str]]:
        authoritative_tentative = [
            ("As the lead expert, I can confirm", "In my opinion"),
            ("Based on my 20 years of experience", "From what I understand"),
            ("I have the final say on this matter", "I'd like to suggest"),
            ("This is the definitive answer", "This might be one possibility"),
            ("My expertise clearly shows that", "It seems to me that"),
        ]
        
        claims = [
            "this approach is optimal",
            "we should change direction",
            "the data supports this",
            "this is the best solution",
            "we need to reconsider"
        ]
        
        pairs = []
        for phrase in authoritative_tentative:
            for claim in claims:
                auth = f"{phrase[0]}, {claim}"
                tent = f"{phrase[1]}, {claim}"
                pairs.append((auth, tent))
        
        return pairs * 2
    
    def save_pairs(self, pairs: List[Tuple[str, str]], trait: str):
        output_file = self.output_dir / f'{trait}_pairs.json'
        
        formatted_pairs = [
            {'positive': pos, 'negative': neg}
            for pos, neg in pairs
        ]
        
        with open(output_file, 'w') as f:
            json.dump(formatted_pairs, f, indent=2)
        
        logger.info(f"Generated {len(pairs)} pairs for {trait}")
    
    def generate_all(self):
        logger.info("Generating manual datasets for 11 traits...")
        
        generators = {
            'certainty': self.generate_certainty_pairs,
            'concreteness': self.generate_concreteness_pairs,
            'creativity': self.generate_creativity_pairs,
            'hedging': self.generate_hedging_pairs,
            'humor': self.generate_humor_pairs,
            'persuasiveness': self.generate_persuasiveness_pairs,
            'politeness': self.generate_politeness_pairs,
            'precision': self.generate_precision_pairs,
            'technical_complexity': self.generate_technical_complexity_pairs,
            'urgency': self.generate_urgency_pairs,
            'authority': self.generate_authority_pairs
        }
        
        for trait, generator in generators.items():
            pairs = generator()
            self.save_pairs(pairs, trait)
        
        logger.info("Manual dataset generation complete!")

def main():
    generator = ManualDatasetGenerator()
    generator.generate_all()

if __name__ == "__main__":
    main()