package parquet

import (
	"fmt"
	"path/filepath"

	"github.com/xitongsys/parquet-go-source/local"
	"github.com/xitongsys/parquet-go/reader"
)

// BatchReader provides batch-based reading of parquet files
type BatchReader[T any] struct {
	files         []string
	currentFile   int
	batchSize     int
	currentData   []T
	currentOffset int
}

// NewBatchReader creates a new batch reader for the given type
func NewBatchReader[T any](inputDir string, batchSize int) (*BatchReader[T], error) {
	// Find all parquet files in the input directory
	pattern := filepath.Join(inputDir, "*.parquet")
	files, err := filepath.Glob(pattern)
	if err != nil {
		return nil, fmt.Errorf("failed to glob parquet files: %w", err)
	}

	if len(files) == 0 {
		return nil, fmt.Errorf("no parquet files found in %s", inputDir)
	}

	return &BatchReader[T]{
		files:       files,
		currentFile: 0,
		batchSize:   batchSize,
	}, nil
}

// ReadBatch reads a batch of records from the parquet files
// Returns the batch, whether there are more batches, and any error
func (br *BatchReader[T]) ReadBatch() ([]T, bool, error) {
	// If we've exhausted current data, load next file
	if br.currentOffset >= len(br.currentData) {
		if br.currentFile >= len(br.files) {
			return nil, false, nil // No more files
		}

		// Load entire file into memory
		data, err := br.readEntireFile(br.files[br.currentFile])
		if err != nil {
			return nil, false, fmt.Errorf("failed to read file %s: %w", br.files[br.currentFile], err)
		}

		br.currentData = data
		br.currentOffset = 0
		br.currentFile++
	}

	// Return a batch from current data
	end := br.currentOffset + br.batchSize
	if end > len(br.currentData) {
		end = len(br.currentData)
	}

	batch := br.currentData[br.currentOffset:end]
	br.currentOffset = end

	// Check if there's more data
	hasMore := br.currentOffset < len(br.currentData) || br.currentFile < len(br.files)

	return batch, hasMore, nil
}

// readEntireFile reads an entire parquet file into memory
func (br *BatchReader[T]) readEntireFile(filename string) ([]T, error) {
	fr, err := local.NewLocalFileReader(filename)
	if err != nil {
		return nil, fmt.Errorf("failed to open file: %w", err)
	}
	defer fr.Close()

	pr, err := reader.NewParquetReader(fr, new(T), 4)
	if err != nil {
		return nil, fmt.Errorf("failed to create parquet reader: %w", err)
	}
	defer pr.ReadStop()

	num := int(pr.GetNumRows())
	records := make([]T, num)

	if err := pr.Read(&records); err != nil {
		return nil, fmt.Errorf("failed to read rows: %w", err)
	}

	return records, nil
}

// GetFileCount returns the total number of parquet files
func (br *BatchReader[T]) GetFileCount() int {
	return len(br.files)
}

// GetCurrentFileIndex returns the index of the current file being processed
func (br *BatchReader[T]) GetCurrentFileIndex() int {
	return br.currentFile
}

// ReadAll reads all records from all parquet files
func ReadAll[T any](inputDir string) ([]T, error) {
	br, err := NewBatchReader[T](inputDir, 10000)
	if err != nil {
		return nil, err
	}

	var allRecords []T
	for {
		batch, hasMore, err := br.ReadBatch()
		if err != nil {
			return nil, err
		}

		allRecords = append(allRecords, batch...)

		if !hasMore {
			break
		}
	}

	return allRecords, nil
}
