#pragma once
// Multiplies a square matrix by a constant and adds the identity matrix.


#include <cstdint>

#include <cuda_runtime.h>

#include <gpu/contexts/device_context.h>
#include <gpu/containers/dense_matrix.h>


namespace npeff {
namespace gpu {
namespace ops {
namespace custom {


class MultiplyAndAddIdentity_InPlace {
    DeviceContext& ctx;
    DenseMatrix& mat;

    const float multiply_factor;

    // TODO: Figure out how to set this.
    const int64_t block_size = 256;

public:
    MultiplyAndAddIdentity_InPlace(
        DeviceContext& ctx,
        DenseMatrix& mat,
        float multiply_factor
    ) : 
        ctx(ctx), mat(mat), multiply_factor(multiply_factor)
    {
        THROW_IF_FALSE(mat.n_rows == mat.n_cols);
    }

    void call_async();
};


}  // custom
}  // ops
}  // gpu
}  // npeff
