/**
 * Copyright (c) 2020  xxx Inc.
 * File              : rgb.cu
 * Author            : 
 * Date              : 2020-05-07
 * Last Modified Date: 2020-05-07
 * Last Modified By  : 
 */
#include "rgb.h"

namespace vf {

template <typename T1, typename T2, int sPixFmt, int dPixFmt>
__global__ void RGB2RGBKernel(const PixelDescr<T1>* src, int dst_N,
                              const CoordMapParam* mparams,
                              const PixelDescr<T2>** dsts, bool swap) {
  int dst_idx = blockIdx.z;
  // minus 1, because in each thead, the right and bottom line are re-computed
  // for synchronization
  int dx = blockIdx.x * blockDim.x + threadIdx.x;
  int dy = blockIdx.y * blockDim.y + threadIdx.y;
  const PixelDescr<T2>* dst = dsts[dst_idx];
  if (dx >= dst->w || dy >= dst->h || dst_idx >= dst_N) return;

  RGB2RGBCvtFunc(dst_idx, dx, dy, src, mparams + dst_idx, dsts[dst_idx], swap);
}

template <typename T1, typename T2, int sPixFmt, int dPixFmt>
struct PixelConvert<kGPU, T1, T2, sPixFmt, dPixFmt> {
  inline static void Map(TStream stream, const PixelDescr<T1>* src, int dst_N,
                         int dst_h, int dst_w, const CoordMapParam* mparams,
                         PixelDescr<T2> const** dsts) {
    const dim3 blockDim(32, 16, 1);
    const dim3 gridDim((dst_w + blockDim.x - 1) / blockDim.x,
                       (dst_h + blockDim.y - 1) / blockDim.y, dst_N);

    RGB2RGBKernel<T1, T2, sPixFmt, dPixFmt>
        <<<gridDim, blockDim, 0, cudaStream_t(stream)>>>(
            src, dst_N, mparams, dsts, isswap(sPixFmt, dPixFmt));

    CUDA_POST_KERNEL_CHECK(RGB2RGBKernel);
  }
};

// bgr source images
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_BGR);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_RGB);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_BGRPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_RGBPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGR, VF_PIX_FMT_RGBAPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRA, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRA, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRA, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRA, VF_PIX_FMT_RGBAPlanar);

// rgb source images
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_BGR);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_RGB);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_BGRPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_RGBPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGB, VF_PIX_FMT_RGBAPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBA, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBA, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBA, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBA, VF_PIX_FMT_RGBAPlanar);

// bgr planar source images
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_BGR);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_RGB);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_BGRPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_RGBPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_RGBAPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRAPlanar, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRAPlanar, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRAPlanar, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_BGRAPlanar, VF_PIX_FMT_RGBAPlanar);

// rgb planar source images
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_BGR);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_RGB);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_BGRPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_RGBPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_RGBAPlanar);

RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBAPlanar, VF_PIX_FMT_BGRA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBAPlanar, VF_PIX_FMT_RGBA);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBAPlanar, VF_PIX_FMT_BGRAPlanar);
RegisterRGBCvtFunc(kGPU, VF_PIX_FMT_RGBAPlanar, VF_PIX_FMT_RGBAPlanar);

}  // namespace vf
