/**
 * Copyright (c) 2020 xxx Inc.
 * File              : image-preprocess.cc
 * Author            : 
 * Date              : 2020-05-07
 * Last Modified Date: 2020-05-07
 * Last Modified By  : 
 */
#include "image-preprocess.h"

#include <cxxutil/logging.h>
#include <cxxutil/strutil.h>

using namespace cxxutil;
using namespace std;

namespace vf {
namespace dl {

template <>
struct PreProcessFunc<kCPU, VF_PIX_FMT_None, kCopy> {
  inline static void Map(TStream stream, int N, const std::vector<int>& shape,
                         const float* src, float* dst, float scale,

                         const float* mean, const float* std) {
    int shape_dim = 1;
    for (size_t i = 0; i < shape.size(); i++) {
      shape_dim *= shape[i];
    }
    memcpy(dst, src, N * shape_dim * sizeof(float));
  }
};

RegisterPreProcessFunc(kCPU, VF_PIX_FMT_1D, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_2D, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_3D, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_4D, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_GRAY, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGR, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGB, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGRA, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGBA, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGRAPlanar, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGBAPlanar, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_YUV420P, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_NV12, VF_PIX_FMT_None, kCopy);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_NV21, VF_PIX_FMT_None, kCopy);

template <typename KFunc>
inline void PreProcessFuncT(TStream stream, int N, int C, int HW,
                            const float* src, float* dst, float scale,
                            const float* mean, const float* std) {
  // const int omp_threads = 0 == omp_num_threads ? 1 : omp_num_threads;
  const int omp_threads = 1;
  for (int img_id = 0; img_id < N; ++img_id) {
    const int p_step = (HW + omp_threads - 1) / omp_threads;
#if USE_OpenMP
#pragma omp parallel for num_threads(omp_threads)
#endif
    for (int pid = 0; pid < HW; pid += p_step) {
      int pid_end = pid + p_step;
      if (pid_end > HW) pid_end = HW;
      for (int pid2 = pid; pid2 < pid_end; ++pid2) {
        KFunc::Map(img_id, pid2, C, HW, src, dst, scale, mean, std);
      }
    }
  }
}

template <>
struct PreProcessFunc<kCPU, VF_PIX_FMT_BGR, kScaleMeanStd> {
  inline static void Map(TStream stream, int N, const std::vector<int>& shape,
                         const float* src, float* dst, float scale,
                         const float* mean, const float* std) {
    CHECK(shape.size() == 3 && shape[2] == 3)
        << "shape must be 3 dimensional and the last must be 3, but got "
        << to_string(shape);
    PreProcessFuncT<ScaleMeanStdHWCKernel>(
        stream, N, shape[2], shape[0] * shape[1], src, dst, scale, mean, std);
  }
};
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGR, VF_PIX_FMT_BGR, kScaleMeanStd);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGB, VF_PIX_FMT_BGR, kScaleMeanStd);

template <>
struct PreProcessFunc<kCPU, VF_PIX_FMT_BGRA, kScaleMeanStd> {
  inline static void Map(TStream stream, int N, const std::vector<int>& shape,
                         const float* src, float* dst, float scale,
                         const float* mean, const float* std) {
    CHECK(shape.size() == 3 && shape[2] == 4)
        << "shape must be 3 dimensional and the last must be 4, but got "
        << to_string(shape);
    PreProcessFuncT<ScaleMeanStdHWCKernel>(
        stream, N, shape[2], shape[0] * shape[1], src, dst, scale, mean, std);
  }
};
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGRA, VF_PIX_FMT_BGRA, kScaleMeanStd);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGBA, VF_PIX_FMT_BGRA, kScaleMeanStd);

template <>
struct PreProcessFunc<kCPU, VF_PIX_FMT_BGRPlanar, kScaleMeanStd> {
  inline static void Map(TStream stream, int N, const std::vector<int>& shape,
                         const float* src, float* dst, float scale,
                         const float* mean, const float* std) {
    CHECK(shape.size() == 3 && shape[0] == 3)
        << "shape must be 3 dimensional and the first must be 3, but got "
        << to_string(shape);
    PreProcessFuncT<ScaleMeanStdCHWKernel>(
        stream, N, shape[0], shape[1] * shape[2], src, dst, scale, mean, std);
  }
};
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGRPlanar, VF_PIX_FMT_BGRPlanar,
                       kScaleMeanStd);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGBPlanar, VF_PIX_FMT_BGRPlanar,
                       kScaleMeanStd);

template <>
struct PreProcessFunc<kCPU, VF_PIX_FMT_BGRAPlanar, kScaleMeanStd> {
  inline static void Map(TStream stream, int N, const std::vector<int>& shape,
                         const float* src, float* dst, float scale,
                         const float* mean, const float* std) {
    CHECK(shape.size() == 3 && shape[0] == 4)
        << "shape must be 3 dimensional and the first must be 4, but got "
        << to_string(shape);
    PreProcessFuncT<ScaleMeanStdCHWKernel>(
        stream, N, shape[0], shape[1] * shape[2], src, dst, scale, mean, std);
  }
};
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_BGRAPlanar, VF_PIX_FMT_BGRAPlanar,
                       kScaleMeanStd);
RegisterPreProcessFunc(kCPU, VF_PIX_FMT_RGBAPlanar, VF_PIX_FMT_BGRAPlanar,
                       kScaleMeanStd);

}  // namespace dl
}  // namespace vf
