#include <iostream>
#include "util/cuda_system.h"
#include "util/matrices.h"
#include "nmf/multi_dense_mu_nmf1.h"

int main() {
    // If I set the environment variables CUDA_VISIBLE_DEVICES=1,3, the devices will have
    // indexes 0, 1.
    CudaSystem::printDevicesInfo();


    // ncclComm_t comms[1];

    // //managing 4 devices
    // int nDev = 1;
    // int size = 32*1024*1024;
    // int devs[1] = { 0 };

    // NCCL_CALL(ncclCommInitAll(comms, nDev, devs));




    MeMatrix F1 = random_matrix(640, 16);
    MeMatrix F2 = random_matrix(16, 1280);

    MeMatrix A = MeMatrix::multiply(F1, F2);
    // MeMatrix A = random_matrix(64, 128);

    std::cout << "Made matrix, starting NMF.\n";

    long rank = 16;
    long seed = 4319043202;
    float eps = 1e-9;
    // float eps = 1e-6;
    int max_iters = 10000;
    // int max_iters = 8;

    MuNmf nmf(&A, rank, max_iters, eps, seed);
    nmf.run();

    return 0;
}
