package models

// PublicRepo represents a filtered GitHub repository.
type PublicRepo struct {
	RepoID          int64  `parquet:"name=repo_id, type=INT64" json:"id"`
	FullName        string `parquet:"name=full_name, type=BYTE_ARRAY, convertedtype=UTF8" json:"full_name"`
	Description     string `parquet:"name=description, type=BYTE_ARRAY, convertedtype=UTF8" json:"description"`
	DefaultBranch   string `parquet:"name=default_branch, type=BYTE_ARRAY, convertedtype=UTF8" json:"default_branch"`
	Language        string `parquet:"name=language, type=BYTE_ARRAY, convertedtype=UTF8" json:"language"`
	StargazersCount int64  `parquet:"name=stargazers_count, type=INT64" json:"stargazers_count"`
	CreatedAt       int64  `parquet:"name=created_at, type=INT64, convertedtype=TIMESTAMP_MILLIS" json:"-"`
	Archived        bool   `parquet:"name=archived, type=BOOLEAN" json:"archived"`
	LicenseKey      string `parquet:"name=license_key, type=BYTE_ARRAY, convertedtype=UTF8" json:"-"`
}

// RawRepoIndex represents the minimal repo data from iterator
type RawRepoIndex struct {
	RepoID   int64  `parquet:"name=repo_id, type=INT64" json:"id"`
	FullName string `parquet:"name=full_name, type=BYTE_ARRAY, convertedtype=UTF8" json:"full_name"`
}

// PRMetadata represents a Pull Request with file metadata
type PRMetadata struct {
	RepoID         int64    `parquet:"name=repo_id, type=INT64" json:"-"`
	RepoName       string   `parquet:"name=repo_name, type=BYTE_ARRAY, convertedtype=UTF8" json:"-"`
	PRNumber       int64    `parquet:"name=pr_number, type=INT64" json:"number"`
	Title          string   `parquet:"name=title, type=BYTE_ARRAY, convertedtype=UTF8" json:"title"`
	Body           string   `parquet:"name=body, type=BYTE_ARRAY, convertedtype=UTF8" json:"body"`
	Author         string   `parquet:"name=author, type=BYTE_ARRAY, convertedtype=UTF8" json:"-"`
	AuthorType     string   `parquet:"name=author_type, type=BYTE_ARRAY, convertedtype=UTF8" json:"-"`
	MergedAt       int64    `parquet:"name=merged_at, type=INT64, convertedtype=TIMESTAMP_MILLIS" json:"-"`
	TotalFiles     int32    `parquet:"name=total_files_changed, type=INT32" json:"-"`
	PyFilesCount   int32    `parquet:"name=py_files_count, type=INT32" json:"-"`
	Extensions     []string `parquet:"name=extensions_list, type=LIST, valuetype=BYTE_ARRAY, convertedtype=UTF8" json:"-"`
	TotalAdditions int32    `parquet:"name=total_additions, type=INT32" json:"-"`
	TotalDeletions int32    `parquet:"name=total_deletions, type=INT32" json:"-"`
	TotalChanges   int32    `parquet:"name=total_changes, type=INT32" json:"-"`
}

// PRListItem represents a PR from the list endpoint
type PRListItem struct {
	Number   int64  `json:"number"`
	Title    string `json:"title"`
	Body     string `json:"body"`
	MergedAt string `json:"merged_at"`
	State    string `json:"state"`
	User     struct {
		Login string `json:"login"`
		Type  string `json:"type"`
	} `json:"user"`
}

// PRFile represents a file changed in a PR
type PRFile struct {
	Filename         string `json:"filename"`
	Status           string `json:"status"`
	PreviousFilename string `json:"previous_filename,omitempty"`
	Additions        int    `json:"additions"`
	Deletions        int    `json:"deletions"`
	Changes          int    `json:"changes"`
}

// Task 3: PR Enrichment Models

// IssueComment represents a comment on a GitHub issue
type IssueComment struct {
	Author    string `parquet:"name=author, type=BYTE_ARRAY, convertedtype=UTF8" json:"user"`
	CreatedAt int64  `parquet:"name=created_at, type=INT64, convertedtype=TIMESTAMP_MILLIS" json:"-"`
	Body      string `parquet:"name=body, type=BYTE_ARRAY, convertedtype=UTF8" json:"body"`
}

// RelatedIssue represents an issue related to a PR
type RelatedIssue struct {
	IssueID   int64          `parquet:"name=issue_id, type=INT64" json:"number"`
	RepoID    int64          `parquet:"name=repo_id, type=INT64" json:"-"`
	Title     string         `parquet:"name=title, type=BYTE_ARRAY, convertedtype=UTF8" json:"title"`
	Body      string         `parquet:"name=body, type=BYTE_ARRAY, convertedtype=UTF8" json:"body"`
	Author    string         `parquet:"name=author, type=BYTE_ARRAY, convertedtype=UTF8" json:"-"`
	CreatedAt int64          `parquet:"name=created_at, type=INT64, convertedtype=TIMESTAMP_MILLIS" json:"-"`
	Comments  []IssueComment `parquet:"name=comments, type=LIST" json:"-"`
}

// FileContent represents the content of a file at a specific SHA
type FileContent struct {
	Path    string `parquet:"name=path, type=BYTE_ARRAY, convertedtype=UTF8"`
	Content string `parquet:"name=content, type=BYTE_ARRAY, convertedtype=UTF8"`
}

// DiffPatch represents a diff patch for a file in a commit
type DiffPatch struct {
	Path  string `parquet:"name=path, type=BYTE_ARRAY, convertedtype=UTF8"`
	Patch string `parquet:"name=patch, type=BYTE_ARRAY, convertedtype=UTF8"`
}

// CommitInfo represents a commit in a PR with filtered diffs
type CommitInfo struct {
	SHA       string      `parquet:"name=sha, type=BYTE_ARRAY, convertedtype=UTF8"`
	Parents   []string    `parquet:"name=parents, type=LIST, valuetype=BYTE_ARRAY, convertedtype=UTF8"`
	Author    string      `parquet:"name=author, type=BYTE_ARRAY, convertedtype=UTF8"`
	Timestamp int64       `parquet:"name=timestamp, type=INT64, convertedtype=TIMESTAMP_MILLIS"`
	Diffs     []DiffPatch `parquet:"name=diffs, type=LIST"`
	Message   string      `parquet:"name=message, type=BYTE_ARRAY, convertedtype=UTF8"`
}

// EnhancedCommitInfo represents a commit with both original and LLM-refined message
type EnhancedCommitInfo struct {
	SHA            string      `parquet:"name=sha, type=BYTE_ARRAY, convertedtype=UTF8"`
	Author         string      `parquet:"name=author, type=BYTE_ARRAY, convertedtype=UTF8"`
	Timestamp      int64       `parquet:"name=timestamp, type=INT64, convertedtype=TIMESTAMP_MILLIS"`
	Diffs          []DiffPatch `parquet:"name=diffs, type=LIST"`
	Message        string      `parquet:"name=message, type=BYTE_ARRAY, convertedtype=UTF8"`
	RefinedMessage string      `parquet:"name=refined_message, type=BYTE_ARRAY, convertedtype=UTF8"`
}

// EnrichedPRData represents the complete enriched data for a single PR
type EnrichedPRData struct {
	// PR Identification
	PRID     int64  `parquet:"name=pr_id, type=INT64"`
	RepoID   int64  `parquet:"name=repo_id, type=INT64"`
	RepoName string `parquet:"name=repo_name, type=BYTE_ARRAY, convertedtype=UTF8"`
	RepoDesc string `parquet:"name=repo_description, type=BYTE_ARRAY, convertedtype=UTF8"`
	Title    string `parquet:"name=title, type=BYTE_ARRAY, convertedtype=UTF8"`
	Body     string `parquet:"name=body, type=BYTE_ARRAY, convertedtype=UTF8"`

	// SHA Information - parent of the first commit in the PR
	FirstCommitParentSHA string `parquet:"name=first_commit_parent_sha, type=BYTE_ARRAY, convertedtype=UTF8"`

	// Related Issue (optional - may be nil)
	Issue             *RelatedIssue `parquet:"name=related_issue, type=STRUCT"`
	RelatedIssueCount int32         `parquet:"name=related_issue_count, type=INT32"`

	// Changed Files
	ChangedPyFiles []string `parquet:"name=changed_py_files, type=LIST, valuetype=BYTE_ARRAY, convertedtype=UTF8"`

	// File Contents at first commit parent SHA
	RelevantFiles []FileContent `parquet:"name=relevant_files, type=LIST"`

	// Commit History
	Commits []CommitInfo `parquet:"name=commits, type=LIST"`

	// File Tree (Python files at first commit parent SHA)
	FileTree []string `parquet:"name=file_tree, type=LIST, valuetype=BYTE_ARRAY, convertedtype=UTF8"`
}

// Task 6: LLM Enhanced PR Data

// LLMEnhancedPRData represents PR data enhanced with LLM-generated summary and refined commit messages
type LLMEnhancedPRData struct {
	// PR Identification
	PRID     int64  `parquet:"name=pr_id, type=INT64"`
	RepoID   int64  `parquet:"name=repo_id, type=INT64"`
	RepoName string `parquet:"name=repo_name, type=BYTE_ARRAY, convertedtype=UTF8"`
	RepoDesc string `parquet:"name=repo_description, type=BYTE_ARRAY, convertedtype=UTF8"`
	Title    string `parquet:"name=title, type=BYTE_ARRAY, convertedtype=UTF8"`
	Body     string `parquet:"name=body, type=BYTE_ARRAY, convertedtype=UTF8"`

	// Related Issue (optional - may be nil)
	Issue             *RelatedIssue `parquet:"name=related_issue, type=STRUCT"`
	RelatedIssueCount int32         `parquet:"name=related_issue_count, type=INT32"`

	// Changed Files
	ChangedPyFiles []string `parquet:"name=changed_py_files, type=LIST, valuetype=BYTE_ARRAY, convertedtype=UTF8"`

	// File Contents at first commit parent SHA
	RelevantFiles []FileContent `parquet:"name=relevant_files, type=LIST"`

	// Enhanced Commit History (with refined messages)
	Commits []EnhancedCommitInfo `parquet:"name=commits, type=LIST"`

	// LLM-generated PR Summary
	PRSummary string `parquet:"name=pr_summary, type=BYTE_ARRAY, convertedtype=UTF8"`

	// File Tree (Python files at first commit parent SHA)
	FileTree []string `parquet:"name=file_tree, type=LIST, valuetype=BYTE_ARRAY, convertedtype=UTF8"`
}

// Task 4: Rendered Text Models

// RenderedPRText represents the final text output for LLM training
type RenderedPRText struct {
	PRID     int64  `parquet:"name=pr_id, type=INT64"`
	RepoID   int64  `parquet:"name=repo_id, type=INT64"`
	RepoName string `parquet:"name=repo_name, type=BYTE_ARRAY, convertedtype=UTF8"`
	Text     string `parquet:"name=text, type=BYTE_ARRAY, convertedtype=UTF8"`
}

// Task 5: Tokenization Models

// TokenizedPRData represents the final training data
type TokenizedPRData struct {
	// Identification
	RepoID   int64  `parquet:"name=repo_id, type=INT64"`
	RepoName string `parquet:"name=repo_name, type=BYTE_ARRAY, convertedtype=UTF8"`
	PRID     int64  `parquet:"name=pr_id, type=INT64"`

	// Tokenized data
	TokenIDs []int32 `parquet:"name=token_ids, type=LIST, valuetype=INT32"`

	// Metadata
	TokenCount int32 `parquet:"name=token_count, type=INT32"`
	ByteSize   int32 `parquet:"name=byte_size, type=INT32"`
}

// TokenStatistics represents statistics for all processed PRs
type TokenStatistics struct {
	RepoID     int64  `parquet:"name=repo_id, type=INT64"`
	RepoName   string `parquet:"name=repo_name, type=BYTE_ARRAY, convertedtype=UTF8"`
	PRID       int64  `parquet:"name=pr_id, type=INT64"`
	TokenCount int32  `parquet:"name=token_count, type=INT32"`
	ByteSize   int32  `parquet:"name=byte_size, type=INT32"`
	Discarded  bool   `parquet:"name=discarded, type=BOOLEAN"` // true if > 32k tokens
}

// API Response Types

// PublicRepoResponse represents the GitHub REST API response for GET /repos/{owner}/{repo}.
type PublicRepoResponse struct {
	ID              int64  `json:"id"`
	FullName        string `json:"full_name"`
	Description     string `json:"description"`
	DefaultBranch   string `json:"default_branch"`
	Language        string `json:"language"`
	StargazersCount int64  `json:"stargazers_count"`
	Archived        bool   `json:"archived"`
	CreatedAt       string `json:"created_at"`
	License         *struct {
		Key string `json:"key"`
	} `json:"license"`
}

// GraphQLClosingIssuesResponse represents the GraphQL response for closing issues
type GraphQLClosingIssuesResponse struct {
	Data struct {
		Repository struct {
			PullRequest struct {
				ClosingIssuesReferences struct {
					Nodes []struct {
						Number     int64 `json:"number"`
						DatabaseId int64 `json:"databaseId"`
						Repository struct {
							DatabaseId int64  `json:"databaseId"`
							Name       string `json:"name"`
							Owner      struct {
								Login string `json:"login"`
							} `json:"owner"`
						} `json:"repository"`
					} `json:"nodes"`
				} `json:"closingIssuesReferences"`
			} `json:"pullRequest"`
		} `json:"repository"`
	} `json:"data"`
	Errors []struct {
		Message string `json:"message"`
	} `json:"errors"`
}

// IssueResponse represents a GitHub issue API response
type IssueResponse struct {
	Number    int64  `json:"number"`
	Title     string `json:"title"`
	Body      string `json:"body"`
	CreatedAt string `json:"created_at"`
	User      struct {
		Login string `json:"login"`
	} `json:"user"`
}

// CommentResponse represents a GitHub issue comment API response
type CommentResponse struct {
	Body      string `json:"body"`
	CreatedAt string `json:"created_at"`
	User      struct {
		Login string `json:"login"`
	} `json:"user"`
}

// FileContentResponse represents GitHub file content API response
type FileContentResponse struct {
	Content  string `json:"content"`
	Encoding string `json:"encoding"`
	Path     string `json:"path"`
}

// CommitResponse represents a GitHub commit API response
type CommitResponse struct {
	SHA    string `json:"sha"`
	Commit struct {
		Author struct {
			Name string `json:"name"`
			Date string `json:"date"`
		} `json:"author"`
		Message string `json:"message"`
	} `json:"commit"`
}

// CommitDetailResponse represents detailed commit info with files
type CommitDetailResponse struct {
	SHA    string `json:"sha"`
	Commit struct {
		Author struct {
			Name string `json:"name"`
			Date string `json:"date"`
		} `json:"author"`
		Message string `json:"message"`
	} `json:"commit"`
	Files []struct {
		Filename string `json:"filename"`
		Patch    string `json:"patch"`
	} `json:"files"`
	Parents []struct {
		SHA string `json:"sha"`
		URL string `json:"url"`
	} `json:"parents"`
}

// PRDetailResponse represents PR details with base SHA
type PRDetailResponse struct {
	Number int64 `json:"number"`
	Base   struct {
		SHA string `json:"sha"`
		Ref string `json:"ref"`
	} `json:"base"`
}

// GitTreeResponse represents GitHub git tree API response
type GitTreeResponse struct {
	SHA       string         `json:"sha"`
	URL       string         `json:"url"`
	Tree      []GitTreeEntry `json:"tree"`
	Truncated bool           `json:"truncated"`
}

// GitTreeEntry represents a single entry in a git tree
type GitTreeEntry struct {
	Path string `json:"path"`
	Mode string `json:"mode"`
	Type string `json:"type"`
	SHA  string `json:"sha"`
	Size int64  `json:"size,omitempty"`
	URL  string `json:"url"`
}
