// auction_matching.cu

#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
#include <limits>
#include <vector>
#include <cfloat> // Include this header for FLT_MAX

#define BLOCK_SIZE 256

__global__ void auctionBidding(
    int n,
    int* unassignedWorkers,
    int numUnassigned,
    const float* C,
    const float* prices,
    float epsilon,
    int* bids,
    float* bidValues,
    int* bestTasks
) {
    int idx = blockDim.x * blockIdx.x + threadIdx.x;
    if (idx < numUnassigned) {
        int worker = unassignedWorkers[idx];

        // Compute net profits
        float maxProfit = -FLT_MAX;
        float secondMaxProfit = -FLT_MAX;
        int bestTask = -1;

        for (int j = 0; j < n; ++j) {
            float profit = -C[worker * n + j] - prices[j];
            if (profit > maxProfit) {
                secondMaxProfit = maxProfit;
                maxProfit = profit;
                bestTask = j;
            } else if (profit > secondMaxProfit) {
                secondMaxProfit = profit;
            }
        }

        // Compute bid
        float bidValue = maxProfit - secondMaxProfit + epsilon;
        bids[worker] = bestTask;
        bidValues[worker] = bidValue;
        bestTasks[worker] = bestTask;
    }
}

__global__ void auctionAssignment(
    int n,
    int* bids,
    float* bidValues,
    int* workerToTask,
    int* taskToWorker,
    float* prices
) {
    int idx = blockDim.x * blockIdx.x + threadIdx.x;
    if (idx < n) {
        int task = bids[idx];
        if (task != -1) {
            float bidValue = bidValues[idx];

            // Use atomic operations to handle concurrent bids
            // Update price
            float oldPrice = atomicAdd(&prices[task], bidValue);

            // Update assignments
            int prevWorker = atomicExch(&taskToWorker[task], idx);

            if (prevWorker != -1 && prevWorker != idx) {
                workerToTask[prevWorker] = -1;
            }
            workerToTask[idx] = task;
        }
    }
}

void auctionAlgorithmCUDA(const std::vector<float>& costMatrix, int n, std::vector<int>& assignment) {
    // Device vectors
    thrust::device_vector<float> d_C(costMatrix);
    thrust::device_vector<float> d_prices(n, 0.0f);
    thrust::device_vector<int> d_workerToTask(n, -1);
    thrust::device_vector<int> d_taskToWorker(n, -1);

    // Epsilon calculation
    float epsilon = 1.0f / (n + 1);

    // Unassigned workers
    thrust::host_vector<int> h_unassignedWorkers(n);
    for (int i = 0; i < n; ++i) {
        h_unassignedWorkers[i] = i;
    }
    thrust::device_vector<int> d_unassignedWorkers = h_unassignedWorkers;

    // Temporary arrays for bids
    thrust::device_vector<int> d_bids(n, -1);
    thrust::device_vector<float> d_bidValues(n, 0.0f);
    thrust::device_vector<int> d_bestTasks(n, -1);

    int numUnassigned = n;
    while (numUnassigned > 0) {
        int gridSize = (numUnassigned + BLOCK_SIZE - 1) / BLOCK_SIZE;

        // Bidding phase
        auctionBidding<<<gridSize, BLOCK_SIZE>>>(
            n,
            thrust::raw_pointer_cast(d_unassignedWorkers.data()),
            numUnassigned,
            thrust::raw_pointer_cast(d_C.data()),
            thrust::raw_pointer_cast(d_prices.data()),
            epsilon,
            thrust::raw_pointer_cast(d_bids.data()),
            thrust::raw_pointer_cast(d_bidValues.data()),
            thrust::raw_pointer_cast(d_bestTasks.data())
        );
        cudaDeviceSynchronize();

        // Assignment phase
        gridSize = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
        auctionAssignment<<<gridSize, BLOCK_SIZE>>>(
            n,
            thrust::raw_pointer_cast(d_bids.data()),
            thrust::raw_pointer_cast(d_bidValues.data()),
            thrust::raw_pointer_cast(d_workerToTask.data()),
            thrust::raw_pointer_cast(d_taskToWorker.data()),
            thrust::raw_pointer_cast(d_prices.data())
        );
        cudaDeviceSynchronize();

        // Update unassigned workers
        thrust::host_vector<int> h_workerToTask = d_workerToTask;
        h_unassignedWorkers.clear();
        for (int i = 0; i < n; ++i) {
            if (h_workerToTask[i] == -1) {
                h_unassignedWorkers.push_back(i);
            }
        }
        numUnassigned = h_unassignedWorkers.size();
        d_unassignedWorkers = h_unassignedWorkers;

        // Reset temporary arrays
        thrust::fill(d_bids.begin(), d_bids.end(), -1);
        thrust::fill(d_bidValues.begin(), d_bidValues.end(), 0.0f);
        thrust::fill(d_bestTasks.begin(), d_bestTasks.end(), -1);
    }

    // Copy results back to host
    thrust::host_vector<int> h_workerToTask = d_workerToTask;
    assignment.assign(h_workerToTask.begin(), h_workerToTask.end()); // Corrected assignment
}

int main() {
    // Define a cost matrix
    const int n = 4;
    std::vector<float> costMatrix = {
        90, 75, 75, 80,
        35, 85, 55, 65,
        125, 95, 90, 105,
        45, 110, 95, 115
    };

    // Negate the cost matrix to create a profit matrix
    for (auto& cost : costMatrix) {
        cost = -cost;
    }

    // Solve the assignment problem
    std::vector<int> assignment;
    auctionAlgorithmCUDA(costMatrix, n, assignment);

    // Print the matched pairs
    std::cout << "Matched pairs (worker, task):" << std::endl;
    for (int i = 0; i < n; ++i) {
        std::cout << "Worker " << i << " assigned to Task " << assignment[i] << std::endl;
    }

    return 0;
}
