package tasks

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"sync"
	"time"

	"github.com/reposurvey/pipeline/client"
	"github.com/reposurvey/pipeline/config"
	"github.com/reposurvey/pipeline/models"
	"github.com/reposurvey/pipeline/parquet"
)

// repoIDJob represents a job to fetch repos in a specific ID range
type repoIDJob struct {
	startID int64
	endID   int64
}

// throughputMonitor tracks and reports pipeline throughput
type throughputMonitor struct {
	mu            sync.Mutex
	reposFetched  int64
	reposFiltered int64
	startTime     time.Time
	client        *client.GithubClient
	lastAPICount  uint64
}

func newThroughputMonitor(client *client.GithubClient) *throughputMonitor {
	return &throughputMonitor{
		startTime:    time.Now(),
		client:       client,
		lastAPICount: 0,
	}
}

func (m *throughputMonitor) addFetched(count int) {
	m.mu.Lock()
	defer m.mu.Unlock()
	m.reposFetched += int64(count)
}

func (m *throughputMonitor) addFiltered(count int) {
	m.mu.Lock()
	defer m.mu.Unlock()
	m.reposFiltered += int64(count)
}

func (m *throughputMonitor) report() {
	m.mu.Lock()
	defer m.mu.Unlock()

	elapsed := time.Since(m.startTime).Seconds()
	fetchRate := float64(m.reposFetched) / elapsed
	filterRate := float64(m.reposFiltered) / elapsed

	// Get current API request count
	currentAPICount := m.client.GetRequestCount()
	apiRequestsSinceLastReport := currentAPICount - m.lastAPICount
	apiRate := float64(currentAPICount) / elapsed

	fmt.Printf("[THROUGHPUT] Fetched: %d (%.1f/s) | Filtered: %d (%.1f/s) | API Requests: %d (%.1f/s, +%d) | Elapsed: %.1fs\n",
		m.reposFetched, fetchRate, m.reposFiltered, filterRate, currentAPICount, apiRate, apiRequestsSinceLastReport, elapsed)

	// Update last API count for next report
	m.lastAPICount = currentAPICount
}

// Task1RepoSurvey implements Phase A (Iterator) and Phase B (Enrichment/Filter)
type Task1RepoSurvey struct {
	cfg    *config.Config
	client *client.GithubClient

	// Phase A: Raw index writer
	rawIndexWriter *parquet.BatchWriter[models.RawRepoIndex]

	// Phase B: Filtered repos writer
	filteredWriter *parquet.BatchWriter[models.PublicRepo]

	// Channel for piping IDs from Phase A to Phase B
	repoChan chan models.RawRepoIndex

	// Job queue for Phase A producers (each job is a 10k ID range)
	jobQueue chan repoIDJob

	// Number of Phase A producers
	numProducers int

	// Throughput monitoring
	monitor *throughputMonitor
}

// NewTask1RepoSurvey creates a new repository survey task
func NewTask1RepoSurvey(cfg *config.Config, client *client.GithubClient) (*Task1RepoSurvey, error) {
	// Create raw index writer
	rawIndexWriter, err := parquet.NewBatchWriter[models.RawRepoIndex](
		cfg.RawIndexDir,
		cfg.BatchSize,
		cfg.MaxFileSize,
		cfg.FlushInterval,
	)
	if err != nil {
		return nil, fmt.Errorf("failed to create raw index writer: %w", err)
	}

	// Create filtered repos writer
	filteredWriter, err := parquet.NewBatchWriter[models.PublicRepo](
		cfg.FilteredReposDir,
		cfg.BatchSize,
		cfg.MaxFileSize,
		cfg.FlushInterval,
	)
	if err != nil {
		rawIndexWriter.Close()
		return nil, fmt.Errorf("failed to create filtered writer: %w", err)
	}

	// Calculate number of producers (use 10% of max concurrency, minimum 4)
	numProducers := cfg.MaxConcurrency / 10
	if numProducers < 4 {
		numProducers = 4
	}

	return &Task1RepoSurvey{
		cfg:            cfg,
		client:         client,
		rawIndexWriter: rawIndexWriter,
		filteredWriter: filteredWriter,
		repoChan:       make(chan models.RawRepoIndex, cfg.MaxConcurrency*2),
		jobQueue:       make(chan repoIDJob, numProducers*10), // Buffer for 10 jobs per producer
		numProducers:   numProducers,
		monitor:        newThroughputMonitor(client),
	}, nil
}

// Run starts both Phase A (multiple producers) and Phase B (enrichment) concurrently
func (t *Task1RepoSurvey) Run(ctx context.Context) error {
	sinceID := t.cfg.SinceID

	fmt.Printf("[INFO] Starting Task 1: Repository Survey from ID %d with %d producers\n", sinceID, t.numProducers)

	// Start throughput monitor
	monitorCtx, monitorCancel := context.WithCancel(ctx)
	defer monitorCancel()
	go t.runThroughputMonitor(monitorCtx)

	// Start Phase B workers (enrichment)
	phaseBDone := make(chan error, 1)
	go func() {
		if err := t.runPhaseB(ctx); err != nil {
			phaseBDone <- fmt.Errorf("Phase B error: %w", err)
		}
		close(phaseBDone)
	}()

	// Start job queue populator
	go t.populateJobQueue(ctx, sinceID)

	// Start multiple Phase A producers
	var wg sync.WaitGroup
	phaseAErrors := make(chan error, t.numProducers)

	for i := 0; i < t.numProducers; i++ {
		wg.Add(1)
		go func(producerID int) {
			defer wg.Done()
			if err := t.runPhaseAProducer(ctx, producerID); err != nil {
				phaseAErrors <- fmt.Errorf("Producer %d error: %w", producerID, err)
			}
		}(i)
	}

	// Wait for all producers to complete
	go func() {
		wg.Wait()
		close(t.repoChan) // Signal Phase B that no more repos will be sent
		close(phaseAErrors)
	}()

	// Check for Phase A errors
	for err := range phaseAErrors {
		if err != nil {
			return err
		}
	}

	// Wait for Phase B to complete
	if err := <-phaseBDone; err != nil {
		return err
	}

	return nil
}

// runThroughputMonitor reports throughput statistics every 30 seconds
func (t *Task1RepoSurvey) runThroughputMonitor(ctx context.Context) {
	ticker := time.NewTicker(30 * time.Second)
	defer ticker.Stop()

	for {
		select {
		case <-ctx.Done():
			// Final report before exit
			t.monitor.report()
			return
		case <-ticker.C:
			t.monitor.report()
		}
	}
}

// populateJobQueue continuously generates jobs for producers
// Each job represents a 10k repo ID range
func (t *Task1RepoSurvey) populateJobQueue(ctx context.Context, startID int64) {
	const jobRangeSize = 10000 // Each job covers 10k repo IDs
	currentStart := startID

	for {
		select {
		case <-ctx.Done():
			close(t.jobQueue)
			return
		default:
		}

		job := repoIDJob{
			startID: currentStart,
			endID:   currentStart + jobRangeSize,
		}

		select {
		case t.jobQueue <- job:
			currentStart += jobRangeSize
		case <-ctx.Done():
			close(t.jobQueue)
			return
		}
	}
}

// runPhaseAProducer implements a single repository iterator producer
// Each producer consumes jobs from the job queue
func (t *Task1RepoSurvey) runPhaseAProducer(ctx context.Context, producerID int) error {
	if producerID == 0 {
		fmt.Printf("[INFO] Starting %d producers with job queue (10k ID range per job)\n", t.numProducers)
	}

	for job := range t.jobQueue {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}

		// Process this job's ID range
		if err := t.processJobRange(ctx, job, producerID); err != nil {
			if !errors.Is(err, context.Canceled) {
				fmt.Printf("[ERROR] Producer %d failed to process job range [%d-%d]: %v\n",
					producerID, job.startID, job.endID, err)
			}
			// Continue to next job even if this one failed
		}
	}

	return nil
}

// processJobRange fetches all repos in the given ID range
func (t *Task1RepoSurvey) processJobRange(ctx context.Context, job repoIDJob, producerID int) error {
	currentID := job.startID

	for currentID < job.endID {
		select {
		case <-ctx.Done():
			return ctx.Err()
		default:
		}

		// Fetch batch of repositories
		endpoint := fmt.Sprintf("/repositories?since=%d", currentID)
		data, err := t.client.Get(ctx, endpoint)
		if err != nil {
			// Don't log context canceled errors during shutdown
			if !errors.Is(err, context.Canceled) {
				fmt.Printf("[ERROR] Producer %d failed to fetch repositories: %v\n", producerID, err)
			}
			time.Sleep(5 * time.Second)
			continue
		}

		// Parse response
		var repos []models.RawRepoIndex
		if err := json.Unmarshal(data, &repos); err != nil {
			fmt.Printf("[ERROR] Producer %d failed to parse repositories: %v\n", producerID, err)
			time.Sleep(5 * time.Second)
			continue
		}

		if len(repos) == 0 {
			// No more repos in this range, move to next job
			break
		}

		// Process each repository
		processedCount := 0
		for _, repo := range repos {
			// Only process repos within this job's range
			if repo.RepoID >= job.endID {
				// Reached end of this job's range
				return nil
			}

			// Write to raw index
			if err := t.rawIndexWriter.Write(repo); err != nil {
				fmt.Printf("[ERROR] Producer %d failed to write raw index: %v\n", producerID, err)
			}

			// Send to Phase B for enrichment (no deduplication needed since ranges don't overlap)
			select {
			case t.repoChan <- repo:
				processedCount++
			case <-ctx.Done():
				return ctx.Err()
			}

			currentID = repo.RepoID
		}

		// Update monitor statistics
		t.monitor.addFetched(len(repos))

		// Move to next batch within this job's range
		currentID++
	}

	return nil
}

// runPhaseB implements the enrichment and filtering
func (t *Task1RepoSurvey) runPhaseB(ctx context.Context) error {
	// Create worker pool
	workerCount := t.cfg.MaxConcurrency
	errChan := make(chan error, workerCount)

	for i := 0; i < workerCount; i++ {
		go func(workerID int) {
			for repo := range t.repoChan {
				if err := t.enrichAndFilter(ctx, repo); err != nil {
					// Don't log context canceled errors during shutdown
					if !errors.Is(err, context.Canceled) {
						fmt.Printf("[ERROR] Worker %d failed to process repo %s: %v\n", workerID, repo.FullName, err)
					}
				}
			}
			errChan <- nil
		}(i)
	}

	// Wait for all workers to complete
	for i := 0; i < workerCount; i++ {
		if err := <-errChan; err != nil {
			return err
		}
	}

	return nil
}

// enrichAndFilter fetches full repo details and applies filters
func (t *Task1RepoSurvey) enrichAndFilter(ctx context.Context, rawRepo models.RawRepoIndex) error {
	// Fetch full repository details
	endpoint := fmt.Sprintf("/repos/%s", rawRepo.FullName)
	data, err := t.client.Get(ctx, endpoint)
	if err != nil {
		// Handle 404 silently (access blocked by GitHub, remapped from 403)
		if errors.Is(err, client.ErrNotFound) {
			return nil // Skip this repo without logging
		}
		// Handle 451 silently (DMCA takedown or legal blocks)
		if errors.Is(err, client.ErrUnavailableForLegalReasons) {
			return nil // Skip this repo without logging
		}
		return fmt.Errorf("failed to fetch repo details: %w", err)
	}

	// Parse response (API response schema != parquet schema)
	var apiRepo models.PublicRepoResponse
	if err := json.Unmarshal(data, &apiRepo); err != nil {
		return fmt.Errorf("failed to parse repo details: %w", err)
	}

	// Map to parquet output schema
	repo := models.PublicRepo{
		RepoID:          apiRepo.ID,
		FullName:        apiRepo.FullName,
		Description:     apiRepo.Description,
		DefaultBranch:   apiRepo.DefaultBranch,
		Language:        apiRepo.Language,
		StargazersCount: apiRepo.StargazersCount,
		Archived:        apiRepo.Archived,
	}

	// License
	if apiRepo.License != nil {
		repo.LicenseKey = apiRepo.License.Key
	}

	// Apply filters
	// Filter 1: Language must be Python
	if repo.Language != t.cfg.TargetLanguage {
		return nil // Discard
	}

	// Filter 2: Minimum stars
	if repo.StargazersCount < int64(t.cfg.MinStars) {
		return nil // Discard
	}

	// Filter 3: Not archived
	if repo.Archived {
		return nil // Discard
	}

	// Parse created_at timestamp
	if apiRepo.CreatedAt != "" {
		if t, err := time.Parse(time.RFC3339, apiRepo.CreatedAt); err == nil {
			repo.CreatedAt = t.UnixMilli()
		}
	}

	// Write to filtered output
	if err := t.filteredWriter.Write(repo); err != nil {
		return fmt.Errorf("failed to write filtered repo: %w", err)
	}

	// Update monitor
	t.monitor.addFiltered(1)

	return nil
}

// Close closes all writers and resources
func (t *Task1RepoSurvey) Close() error {
	var errs []error

	if err := t.rawIndexWriter.Close(); err != nil {
		errs = append(errs, fmt.Errorf("failed to close raw index writer: %w", err))
	}

	if err := t.filteredWriter.Close(); err != nil {
		errs = append(errs, fmt.Errorf("failed to close filtered writer: %w", err))
	}

	if len(errs) > 0 {
		return fmt.Errorf("errors closing Task1: %v", errs)
	}

	return nil
}
