// Package diff provides utilities for parsing and translating unified diff patches
// into str_replace_editor commands for SWE agent training data.
package diff

import (
	"fmt"
	"regexp"
	"strconv"
	"strings"
)

// Hunk represents a single hunk in a unified diff
type Hunk struct {
	OldStart int    // Starting line number in original file
	OldCount int    // Number of lines in original file
	NewStart int    // Starting line number in new file
	NewCount int    // Number of lines in new file
	Header   string // The @@ line header (may contain function context)

	// Lines in the hunk, with their prefixes (+, -, or space)
	Lines []DiffLine
}

// DiffLine represents a single line in a diff hunk
type DiffLine struct {
	Type    LineType // Added, Removed, or Context
	Content string   // Line content without the prefix
}

// LineType indicates whether a line was added, removed, or is context
type LineType int

const (
	LineContext LineType = iota // Unchanged line (space prefix)
	LineAdded                   // Added line (+ prefix)
	LineRemoved                 // Removed line (- prefix)
)

// FileDiff represents the diff for a single file
type FileDiff struct {
	OldPath       string // Original file path (or /dev/null for new files)
	NewPath       string // New file path (or /dev/null for deleted files)
	Hunks         []Hunk // List of hunks in the diff
	IsBinary      bool   // True if this is a binary file
	IsNewFile     bool   // True if this is a new file (old path is /dev/null)
	IsDeletedFile bool   // True if this is a deleted file (new path is /dev/null)
	IsRename      bool   // True if this is a rename (old path != new path, neither is /dev/null)
}

// ParsePatch parses a unified diff patch string into structured data.
// It detects:
// - New file creation (old path is /dev/null)
// - File deletion (new path is /dev/null)
// - File rename (old path != new path)
// - Binary files
func ParsePatch(patch string) (*FileDiff, error) {
	if patch == "" {
		// New file
		return &FileDiff{}, nil
	}

	diff := &FileDiff{}
	lines := strings.Split(patch, "\n")

	// // Check for binary file indicator
	// for _, line := range lines {
	// 	if strings.Contains(line, "Binary files") || strings.Contains(line, "GIT binary patch") {
	// 		diff.IsBinary = true
	// 		// Try to extract paths from binary file message
	// 		// Format: "Binary files a/path and b/path differ"
	// 		if strings.HasPrefix(line, "Binary files ") {
	// 			parts := strings.Split(line, " and ")
	// 			if len(parts) == 2 {
	// 				oldPart := strings.TrimPrefix(parts[0], "Binary files ")
	// 				newPart := strings.TrimSuffix(parts[1], " differ")
	// 				diff.OldPath = cleanPath(oldPart)
	// 				diff.NewPath = cleanPath(newPart)
	// 			}
	// 		}
	// 		return diff, nil
	// 	}
	// }

	// Parse file paths and hunks
	var currentHunk *Hunk
	hunkHeaderRegex := regexp.MustCompile(`^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@(.*)$`)

	for _, line := range lines {
		// Parse old file path (--- a/path or --- /dev/null)
		if strings.HasPrefix(line, "--- ") {
			path := strings.TrimPrefix(line, "--- ")
			diff.OldPath = cleanPath(path)
			continue
		}

		// Parse new file path (+++ b/path or +++ /dev/null)
		if strings.HasPrefix(line, "+++ ") {
			path := strings.TrimPrefix(line, "+++ ")
			diff.NewPath = cleanPath(path)
			continue
		}

		// Check for hunk header
		if matches := hunkHeaderRegex.FindStringSubmatch(line); matches != nil {
			// Save previous hunk if exists
			if currentHunk != nil {
				diff.Hunks = append(diff.Hunks, *currentHunk)
			}

			// Parse hunk header
			oldStart, _ := strconv.Atoi(matches[1])
			oldCount := 1
			if matches[2] != "" {
				oldCount, _ = strconv.Atoi(matches[2])
			}
			newStart, _ := strconv.Atoi(matches[3])
			newCount := 1
			if matches[4] != "" {
				newCount, _ = strconv.Atoi(matches[4])
			}

			currentHunk = &Hunk{
				OldStart: oldStart,
				OldCount: oldCount,
				NewStart: newStart,
				NewCount: newCount,
				Header:   matches[5], // Function context after @@
				Lines:    []DiffLine{},
			}
			continue
		}

		// Skip non-hunk lines if we haven't started a hunk yet
		if currentHunk == nil {
			continue
		}

		// Parse diff lines
		if len(line) == 0 {
			// Empty line in diff (context line with no content)
			currentHunk.Lines = append(currentHunk.Lines, DiffLine{
				Type:    LineContext,
				Content: "",
			})
		} else {
			prefix := line[0]
			content := ""
			if len(line) > 1 {
				content = line[1:]
			}

			switch prefix {
			case '+':
				currentHunk.Lines = append(currentHunk.Lines, DiffLine{
					Type:    LineAdded,
					Content: content,
				})
			case '-':
				currentHunk.Lines = append(currentHunk.Lines, DiffLine{
					Type:    LineRemoved,
					Content: content,
				})
			case ' ':
				currentHunk.Lines = append(currentHunk.Lines, DiffLine{
					Type:    LineContext,
					Content: content,
				})
			case '\\':
				// "\ No newline at end of file" - skip
				continue
			default:
				// Treat as context line (some diffs don't have space prefix for context)
				currentHunk.Lines = append(currentHunk.Lines, DiffLine{
					Type:    LineContext,
					Content: line,
				})
			}
		}
	}

	// Don't forget the last hunk
	if currentHunk != nil {
		diff.Hunks = append(diff.Hunks, *currentHunk)
	}

	// Determine file operation type from path headers
	diff.IsNewFile = isDevNull(diff.OldPath)
	diff.IsDeletedFile = isDevNull(diff.NewPath)
	diff.IsRename = !diff.IsNewFile && !diff.IsDeletedFile &&
		diff.OldPath != "" && diff.NewPath != "" &&
		diff.OldPath != diff.NewPath

	// Detect file deletion/creation from hunk headers when path headers are missing
	// (GitHub API patches don't include --- /dev/null headers)
	if !diff.IsNewFile && !diff.IsDeletedFile && len(diff.Hunks) > 0 {
		// Check for file deletion: single hunk with NewCount=0 and only removed lines
		if isFileDeletionFromHunks(diff.Hunks) {
			diff.IsDeletedFile = true
		}
		// Check for new file: single hunk with OldCount=0 and only added lines
		if isNewFileFromHunks(diff.Hunks) {
			diff.IsNewFile = true
		}
	}

	return diff, nil
}

// cleanPath removes the a/ or b/ prefix from git diff paths
func cleanPath(path string) string {
	path = strings.TrimSpace(path)
	if strings.HasPrefix(path, "a/") {
		return path[2:]
	}
	if strings.HasPrefix(path, "b/") {
		return path[2:]
	}
	return path
}

// isDevNull checks if a path represents /dev/null (new or deleted file)
func isDevNull(path string) bool {
	return path == "/dev/null" || path == "dev/null"
}

// isFileDeletionFromHunks detects file deletion from hunk content when path headers are missing.
// A file deletion is detected when:
// - There's a single hunk with NewCount=0 (or NewStart=0)
// - All lines in the hunk are removed lines (no context, no additions)
func isFileDeletionFromHunks(hunks []Hunk) bool {
	if len(hunks) != 1 {
		return false
	}

	hunk := hunks[0]
	// Check hunk header indicates deletion: NewCount=0 or NewStart=0
	if hunk.NewCount != 0 && hunk.NewStart != 0 {
		return false
	}

	// Verify all lines are removed (no context, no additions)
	for _, line := range hunk.Lines {
		if line.Type != LineRemoved {
			return false
		}
	}

	return len(hunk.Lines) > 0
}

// isNewFileFromHunks detects new file creation from hunk content when path headers are missing.
// A new file is detected when:
// - There's a single hunk with OldStart=0 AND OldCount=0 (file didn't exist)
// - All lines in the hunk are added lines (no context, no removals)
// Note: OldStart=1 with OldCount=0 is an insert-at-start, NOT a new file
func isNewFileFromHunks(hunks []Hunk) bool {
	if len(hunks) != 1 {
		return false
	}

	hunk := hunks[0]
	// Check hunk header indicates new file: OldStart=0 AND OldCount=0
	// OldStart=1 with OldCount=0 is insert-at-start, not new file
	if hunk.OldStart != 0 || hunk.OldCount != 0 {
		return false
	}

	// Verify all lines are added (no context, no removals)
	for _, line := range hunk.Lines {
		if line.Type != LineAdded {
			return false
		}
	}

	return len(hunk.Lines) > 0
}

// EditCommand represents a str_replace_editor command
type EditCommand struct {
	Command    string // "str_replace", "insert", "create", "delete", "rename"
	Path       string // File path (target path for rename)
	OldPath    string // For rename: original file path
	OldStr     string // For str_replace: content to search for
	NewStr     string // For str_replace/insert: content to replace with
	InsertLine int    // For insert: line number to insert after
	FileText   string // For create: full file content
}

// TranslateHunk converts a single hunk into an EditCommand
func TranslateHunk(hunk Hunk, filePath string) (*EditCommand, error) {
	var oldLines []string
	var newLines []string

	// Separate old (removed + context) and new (added + context) content
	for _, line := range hunk.Lines {
		switch line.Type {
		case LineContext:
			oldLines = append(oldLines, line.Content)
			newLines = append(newLines, line.Content)
		case LineRemoved:
			oldLines = append(oldLines, line.Content)
		case LineAdded:
			newLines = append(newLines, line.Content)
		}
	}

	// Determine the type of change
	hasRemovals := false
	hasAdditions := false
	for _, line := range hunk.Lines {
		if line.Type == LineRemoved {
			hasRemovals = true
		}
		if line.Type == LineAdded {
			hasAdditions = true
		}
	}

	// Build the edit command
	cmd := &EditCommand{
		Path: filePath,
	}

	if !hasRemovals && hasAdditions {
		// Addition only - we still use str_replace with context
		// Find the context before the addition
		cmd.Command = "str_replace"
		cmd.OldStr = strings.Join(oldLines, "\n")
		cmd.NewStr = strings.Join(newLines, "\n")
	} else if hasRemovals && !hasAdditions {
		// Deletion only
		cmd.Command = "str_replace"
		cmd.OldStr = strings.Join(oldLines, "\n")
		cmd.NewStr = strings.Join(newLines, "\n")
	} else {
		// Mixed changes (most common)
		cmd.Command = "str_replace"
		cmd.OldStr = strings.Join(oldLines, "\n")
		cmd.NewStr = strings.Join(newLines, "\n")
	}

	return cmd, nil
}

// TranslateDiff converts a FileDiff into a list of EditCommands
func TranslateDiff(diff *FileDiff, filePath string) ([]*EditCommand, error) {
	if diff.IsBinary {
		return nil, fmt.Errorf("cannot translate binary file diff")
	}

	var commands []*EditCommand

	for _, hunk := range diff.Hunks {
		cmd, err := TranslateHunk(hunk, filePath)
		if err != nil {
			return nil, fmt.Errorf("failed to translate hunk: %w", err)
		}
		commands = append(commands, cmd)
	}

	return commands, nil
}

// FormatEditCommand formats an EditCommand as training text.
// The format is designed to be readable without the system prompt context
// while still being directly mappable to str_replace_editor tool calls.
func FormatEditCommand(cmd *EditCommand) string {
	var sb strings.Builder

	switch cmd.Command {
	case "str_replace":
		sb.WriteString(fmt.Sprintf("Edit: %s\n\n", cmd.Path))
		sb.WriteString("Search:\n")
		sb.WriteString("```\n")
		sb.WriteString(cmd.OldStr)
		if !strings.HasSuffix(cmd.OldStr, "\n") {
			sb.WriteString("\n")
		}
		sb.WriteString("```\n\n")
		sb.WriteString("Replace:\n")
		sb.WriteString("```\n")
		// Use empty string directly for deletions (no placeholder text)
		if cmd.NewStr != "" {
			sb.WriteString(cmd.NewStr)
			if !strings.HasSuffix(cmd.NewStr, "\n") {
				sb.WriteString("\n")
			}
		}
		sb.WriteString("```\n")

	case "insert":
		sb.WriteString(fmt.Sprintf("Edit: %s\n\n", cmd.Path))
		sb.WriteString(fmt.Sprintf("Insert after line %d:\n", cmd.InsertLine))
		sb.WriteString("```\n")
		sb.WriteString(cmd.NewStr)
		if !strings.HasSuffix(cmd.NewStr, "\n") {
			sb.WriteString("\n")
		}
		sb.WriteString("```\n")

	case "create":
		sb.WriteString(fmt.Sprintf("Create: %s\n\n", cmd.Path))
		sb.WriteString("Content:\n")
		sb.WriteString("```\n")
		sb.WriteString(cmd.FileText)
		if !strings.HasSuffix(cmd.FileText, "\n") {
			sb.WriteString("\n")
		}
		sb.WriteString("```\n")

	case "delete":
		sb.WriteString(fmt.Sprintf("Remove: %s\n", cmd.Path))

	case "rename":
		sb.WriteString(fmt.Sprintf("Rename: %s\n\n", cmd.OldPath))
		sb.WriteString(fmt.Sprintf("To: %s\n", cmd.Path))
	}

	return sb.String()
}

// TranslateAndFormat is a convenience function that parses a patch and formats it
func TranslateAndFormat(patch string, filePath string) (string, error) {
	diff, err := ParsePatch(patch)
	if err != nil {
		return "", fmt.Errorf("failed to parse patch: %w", err)
	}

	if diff.IsBinary {
		return "", fmt.Errorf("binary file, cannot translate")
	}

	commands, err := TranslateDiff(diff, filePath)
	if err != nil {
		return "", err
	}

	var parts []string
	for _, cmd := range commands {
		parts = append(parts, FormatEditCommand(cmd))
	}

	return strings.Join(parts, "\n---\n\n"), nil
}
