import subprocess
from pathlib import Path
from typing import Any, Dict, List, Optional
from tools.basic import FileBasedTool, ToolError
from tools.utils import count_tokens, try_decode_file, detect_file_type, truncate_content_by_tokens, extract_pdf_text

class DownloadCompetitionDataTool(FileBasedTool):
    """Tool to download Kaggle competition data."""

    def __init__(self, work_directory: Optional[str] = None):
        super().__init__(
            name="download_competition_data",
            description="Download Kaggle competition dataset to a specified directory.",
            work_directory=work_directory
        )

    def get_schema(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "description": self.description,
            "parameters": {
                "type": "object",
                "properties": {
                    "competition_name": {
                        "type": "string",
                        "description": "Name of the Kaggle competition."
                    },
                    "target_dir": {
                        "type": "string",
                        "description": "Target directory for download, relative to the work directory.",
                        "default": "data"
                    }
                },
                "required": ["competition_name"]
            }
        }

    def execute(self, competition_name: str, target_dir: str = "data") -> Dict[str, Any]:
        """
        Download Kaggle competition dataset to specified directory.
        
        Args:
            competition_name: Name or partial name of the competition
            target_dir: Target directory for download (must be within work directory)
            
        Returns:
            Dict with download status and information
        """
        try:
            # Validate target directory is within work directory
            target_path = self._validate_path(target_dir, "Download")
            
            # Create target directory
            target_path.mkdir(parents=True, exist_ok=True)
            
            # Use kaggle API to download dataset (assuming it's a dataset, not competition)
            cmd = ["kaggle", "datasets", "download", "-d", competition_name, "-p", str(target_path), "--unzip"]
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
            
            if result.returncode == 0:
                return {
                    "success": True,
                    "target_dir": str(target_path),
                    "message": f"Successfully downloaded {competition_name}"
                }
            else:
                # Include both stderr and stdout for more complete error information
                error_msg = result.stderr.strip() if result.stderr.strip() else result.stdout.strip()
                if not error_msg:
                    error_msg = f"Command failed with return code {result.returncode}"
                return {
                    "success": False,
                    "error": f"Kaggle download failed: {error_msg}"
                }
                
        except subprocess.TimeoutExpired:
            return {"success": False, "error": "Download timeout (3600s exceeded)"}
        except Exception as e:
            return {"success": False, "error": f"Download error: {str(e)}"}
class ReadFileTool(FileBasedTool):
    """Tool to read content from a file."""

    def __init__(self, work_directory: Optional[str] = None):
        super().__init__(
            name="read_file",
            description="Read content from various file formats within the work directory.",
            work_directory=work_directory
        )

    def get_schema(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "description": self.description,
            "parameters": {
                "type": "object",
                "properties": {
                    "filepath": {
                        "type": "string",
                        "description": "Path to the file, relative to the work directory."
                    },
                    "start_line": {
                        "type": "integer",
                        "description": "Starting line number (0-indexed).",
                        "default": 0
                    },
                    "num_lines": {
                        "type": "integer",
                        "description": "Number of lines to read.",
                        "default": 5
                    },
                    "token_limit": {
                        "type": "integer",
                        "description": "Maximum number of tokens to return.",
                        "default": 5000
                    }
                },
                "required": ["filepath"]
            }
        }

    def execute(self, filepath: str, start_line: int = 0, num_lines: int = 5, token_limit: Optional[int] = 5000) -> Dict[str, Any]:
        """Reads a file's content with slicing and token limiting."""
        full_path = self._validate_path(filepath, "read")
        if not full_path.is_file():
            raise ToolError(f"File not found: {filepath}")

        # Set hard token limit to 50k
        HARD_TOKEN_LIMIT = 50000
        if token_limit is None or token_limit > HARD_TOKEN_LIMIT:
            token_limit = HARD_TOKEN_LIMIT

        try:
            file_type = detect_file_type(full_path)
            
            if file_type == 'pdf':
                # Handle PDF files with text extraction
                try:
                    content = extract_pdf_text(full_path)
                    lines = content.splitlines(keepends=True)
                    
                    # Handle line slicing
                    total_lines = len(lines)
                    if start_line >= total_lines:
                        lines_read = []
                    else:
                        end_line = min(start_line + num_lines, total_lines)
                        lines_read = lines[start_line:end_line]
                    
                    content = "".join(lines_read)
                    
                    # Apply token limit (guaranteed to be <= 50k)
                    content = truncate_content_by_tokens(content, token_limit)

                    return {
                        "success": True,
                        "content": content,
                        "filepath": filepath,
                        "file_type": "pdf",
                        "lines_read": f"{start_line} to {start_line + len(lines_read) - 1} (of {total_lines} total)" if lines_read else f"0 lines (file has {total_lines} total)",
                        "encoding_used": "pdf_text_extraction"
                    }
                except Exception as e:
                    raise ToolError(f"Failed to extract text from PDF '{filepath}': {str(e)}")
            
            elif file_type == 'unsupported_binary':
                # For unsupported binary files, return info without reading content
                file_size = full_path.stat().st_size
                return {
                    "success": True,
                    "content": f"[Unsupported binary file: {full_path.suffix}]\nFile type is not supported for text reading.\nFile size: {file_size} bytes",
                    "filepath": filepath,
                    "file_type": "unsupported_binary",
                    "file_size": file_size
                }
            
            # Handle text files
            content, encoding_used = try_decode_file(full_path)
            lines = content.splitlines(keepends=True)
            
            # Handle line slicing
            total_lines = len(lines)
            if start_line >= total_lines:
                lines_read = []
            else:
                end_line = min(start_line + num_lines, total_lines)
                lines_read = lines[start_line:end_line]
            
            content = "".join(lines_read)
            
            # Apply token limit (guaranteed to be <= 50k)
            content = truncate_content_by_tokens(content, token_limit)

            return {
                "success": True,
                "content": content,
                "filepath": filepath,
                "file_type": "text",
                "lines_read": f"{start_line} to {start_line + len(lines_read) - 1} (of {total_lines} total)" if lines_read else f"0 lines (file has {total_lines} total)",
                "encoding_used": encoding_used
            }
        except Exception as e:
            raise ToolError(f"Failed to read file '{filepath}': {str(e)}")

class WriteFileTool(FileBasedTool):
    """Tool to write content to a file."""

    def __init__(self, work_directory: Optional[str] = None):
        super().__init__(
            name="write_file",
            description="Write or append content to a file in the work directory.",
            work_directory=work_directory
        )

    def get_schema(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "description": self.description,
            "parameters": {
                "type": "object",
                "properties": {
                    "file_path": {
                        "type": "string",
                        "description": "Target file path, relative to the work directory."
                    },
                    "content": {
                        "type": "string",
                        "description": "Content to write to the file."
                    },
                    "mode": {
                        "type": "string",
                        "description": "Write mode: 'w' for overwrite, 'a' for append.",
                        "default": "w"
                    },
                    "encoding": {
                        "type": "string",
                        "description": "File encoding to use.",
                        "default": "utf-8"
                    }
                },
                "required": ["file_path", "content"]
            }
        }

    def execute(self, file_path: str, content: str, mode: str = 'w', encoding: str = 'utf-8') -> Dict[str, Any]:
        """Writes content to a specified file."""
        if mode not in ['w', 'a']:
            raise ToolError("Invalid mode. Must be 'w' (overwrite) or 'a' (append).")

        full_path = self._validate_path(file_path, "write")
        full_path.parent.mkdir(parents=True, exist_ok=True)

        try:
            # Try specified encoding first, then fallback encodings
            encodings_to_try = [encoding]
            if encoding != 'utf-8':
                encodings_to_try.append('utf-8')
            encodings_to_try.extend(['utf-8-sig', 'latin-1'])
            
            success = False
            encoding_used = None
            
            for enc in encodings_to_try:
                try:
                    with open(full_path, mode, encoding=enc) as f:
                        f.write(content)
                    encoding_used = enc
                    success = True
                    break
                except (UnicodeEncodeError, UnicodeDecodeError):
                    continue
            
            if not success:
                # Last resort: write as binary with UTF-8 encoding and error replacement
                content_bytes = content.encode('utf-8', errors='replace')
                with open(full_path, mode + 'b') as f:
                    f.write(content_bytes)
                encoding_used = 'utf-8_with_replacement'
            
            action = "Appended to" if mode == 'a' else "Written to"
            return {
                "success": True,
                "message": f"Content successfully {action} file: {file_path}",
                "encoding_used": encoding_used,
                "bytes_written": len(content.encode('utf-8', errors='replace'))
            }
        except Exception as e:
            raise ToolError(f"Failed to write to file '{file_path}': {str(e)}")

class ExecuteBashTool(FileBasedTool):
    """Tool to execute a bash command."""

    def __init__(self, work_directory: Optional[str] = None):
        super().__init__(
            name="execute_bash",
            description="Execute a bash command safely within the work directory.",
            work_directory=work_directory
        )

    def get_schema(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "description": self.description,
            "parameters": {
                "type": "object",
                "properties": {
                    "command": {"type": "string", "description": "The bash command to execute."},
                    "timeout": {"type": "integer", "description": "Command timeout in seconds.", "default": 60},
                    "cwd": {"type": "string", "description": "Sub-directory to run command in. Defaults to work directory."},
                    "capture_output": {"type": "boolean", "description": "Whether to capture stdout/stderr.", "default": True}
                },
                "required": ["command"]
            }
        }

    def execute(self, command: str, timeout: int = 60, cwd: Optional[str] = None, capture_output: bool = True) -> Dict[str, Any]:
        """Executes a bash command."""
        if cwd:
            exec_cwd = self._validate_path(cwd, "execute bash in")
        else:
            exec_cwd = self.work_directory

        # Set hard token limit to 50k
        HARD_TOKEN_LIMIT = 50000

        try:
            result = subprocess.run(
                command, shell=True, capture_output=capture_output, text=True,
                timeout=timeout, cwd=exec_cwd, check=False
            )
            
            # Truncate stdout and stderr if they exceed token limit
            stdout = result.stdout if capture_output else None
            stderr = result.stderr if capture_output else None
            
            if stdout and count_tokens(stdout) > HARD_TOKEN_LIMIT:
                stdout = truncate_content_by_tokens(stdout, HARD_TOKEN_LIMIT)
            
            if stderr and count_tokens(stderr) > HARD_TOKEN_LIMIT:
                stderr = truncate_content_by_tokens(stderr, HARD_TOKEN_LIMIT)
            
            return {
                "success": result.returncode == 0,
                "return_code": result.returncode,
                "stdout": stdout,
                "stderr": stderr,
                "command": command,
                "cwd": str(exec_cwd)
            }
        except subprocess.TimeoutExpired:
            return {"success": False, "error": f"Command timed out after {timeout}s", "command": command}
        except Exception as e:
            return {"success": False, "error": f"Execution error: {str(e)}", "command": command}

class ListDirectoryStructureTool(FileBasedTool):
    """Tool to list the structure of a directory."""

    def __init__(self, work_directory: Optional[str] = None):
        super().__init__(
            name="list_directory_structure",
            description="Show directory structure with limits on files, folders, and depth.",
            work_directory=work_directory
        )

    def get_schema(self) -> Dict[str, Any]:
        return {
            "name": self.name,
            "description": self.description,
            "parameters": {
                "type": "object",
                "properties": {
                    "directory": {"type": "string", "description": "Target directory path.", "default": "."},
                    "max_folders": {"type": "integer", "description": "Max folders to show per level.", "default": 5},
                    "max_files": {"type": "integer", "description": "Max files to show per level.", "default": 5},
                    "max_depth": {"type": "integer", "description": "Maximum depth to traverse.", "default": 3},
                    "token_limit": {"type": "integer", "description": "Token limit for the output.", "default": 5000}
                },
                "required": []
            }
        }

    def _build_tree(self, path: Path, max_depth: int, max_f: int, max_d: int, prefix: str = '', depth: int = 0) -> List[str]:
        if depth > max_depth: return []
        lines = []
        try:
            items = sorted(list(path.iterdir()), key=lambda p: (p.is_file(), p.name.lower()))
        except PermissionError:
            return [f"{prefix}[error: permission denied]"]

        dirs = [i for i in items if i.is_dir()]
        files = [i for i in items if i.is_file()]
        entries = dirs[:max_d] + (["..."] if len(dirs) > max_d else []) + \
                  files[:max_f] + (["..."] if len(files) > max_f else [])

        for i, item in enumerate(entries):
            connector = "└── " if i == len(entries) - 1 else "├── "
            if isinstance(item, str):
                lines.append(f"{prefix}{connector}{item}")
                continue
            
            lines.append(f"{prefix}{connector}{item.name}{'/' if item.is_dir() else ''}")
            if item.is_dir():
                ext = "    " if i == len(entries) - 1 else "│   "
                lines.extend(self._build_tree(item, max_depth, max_f, max_d, prefix + ext, depth + 1))
        return lines

    def execute(self, directory: str = '.', max_folders: int = 5, max_files: int = 5, max_depth: int = 3, token_limit: Optional[int] = None) -> Dict[str, Any]:
        start_path = self._validate_path(directory, "list directory")
        if not start_path.is_dir(): raise ToolError(f"Directory not found: {directory}")

        # Set hard token limit to 50k
        HARD_TOKEN_LIMIT = 50000
        if token_limit is None or token_limit > HARD_TOKEN_LIMIT:
            token_limit = HARD_TOKEN_LIMIT

        tree_lines = [f"{start_path.name}/"]
        tree_lines.extend(self._build_tree(start_path, max_depth, max_files, max_folders))
        result_str = "\n".join(tree_lines)

        if count_tokens(result_str) > token_limit:
            result_str = truncate_content_by_tokens(result_str, token_limit)

        return {"success": True, "directory_listing": result_str}
