#include "./hadamard_product.h"

namespace npeff {
namespace gpu {
namespace ops {
namespace custom {


__global__
void HadamardProduct_Kernel(int64_t n, float* left, float* right, float* out) {
    INDEX_STRIDE_1D(n, i) {
        out[i] = left[i] * right[i];
    }
}


void HadamardProduct::call_async() {
    ctx.set_device();
    long n_blocks = (n_elements + block_size - 1) / block_size;

    HadamardProduct_Kernel<<<n_blocks, block_size, 0, ctx.stream>>>(
        n_elements, (float*) left.data, (float*) right.data, (float*) out.data
    );
}


}  // custom
}  // ops
}  // gpu
}  // npeff
