

#include <R.h>
#include "rf.h"

void simpleLinReg(int nsample, double *x, double *y, double *coef,
                  double *mse, int *hasPred);


/*
 Train a regression randomforest model
 */
void regRF(double *x, double *y,int *useweights, double *weights, int *xdim, int *sampsize,
           int nthsize, int nrnodes, int nTree, int *mtry,
           int *cat, int *maxcat, int *treeSize, int *nodestatus,
           int *lDaughter, int *rDaughter, double *avnode, int *mbest,
           double *upper, int *keepf, int *replace, double *ypred) {
  /*************************************************************************
   Input:
   mdim=number of variables in data set
   nsample=number of cases

   nthsize=number of cases in a node below which the tree will not split,
   setting nthsize=5 generally gives good results.

   nTree=number of trees in run.  200-500 gives pretty good results

   mtry=number of variables to pick to split on at each node.  mdim/3
   seems to give genrally good performance, but it can be
   altered up or down

   imp=1 turns on variable importance.  This is computed for the
   mth variable as the percent rise in the test set mean sum-of-
   squared errors when the mth variable is randomly permuted.

   *************************************************************************/

//
  // **lDaughter = (int *) Calloc(nrnodes* nTree, int);
  // **rDaughter = (int *) Calloc(nrnodes* nTree, int);
  // **nodestatus = (int *) Calloc(nrnodes* nTree, int);
  // **mbest=(int *) Calloc(nrnodes* nTree, int);
  // **avnode = (double *) Calloc(nrnodes* nTree, double);
  // **upper =  (double *) Calloc(nTree* nTree, double);

  double  r, xrand;

  double *yb, *xb,*ytree;
  //double *ytree;
  int k, m, mr, n, j, jout, idx=0,
  nsample, mdim, keepF;
  int  *varUsed;

  int *in, *nind, *nodex,ts=0;

  nsample = sampsize;//xdim[0];

  mdim = xdim; //xdim[1];

  keepF = keepf;

//
  yb         = (double *) S_alloc(nsample, sizeof(double));
  xb         = (double *) S_alloc(mdim * nsample, sizeof(double));

//
  in        = (int *) S_alloc(nsample, sizeof(int));
  nodex      = (int *) S_alloc(nsample, sizeof(int));
  varUsed    = (int *) S_alloc(mdim, sizeof(int));
// nind = *replace ? NULL : (int *) S_alloc(nsample, sizeof(int));
 //Rprintf("j %d", 1);
 ytree      = (double *) S_alloc(nsample, sizeof(double));
 zeroDouble(ytree, nsample);

//
GetRNGstate();
  /*************************************
   * Start the loop over trees.
   *************************************/
  int *sampledIndices = (int*)Calloc(nsample,int);
   zeroInt(sampledIndices,nsample);
  for (j = 0; j < nTree; ++j) {

    //Rprintf("nrnodes %d",nrnodes);
    idx += ts;//j * nrnodes;//keepF ? j * *nrnodes : 0;
   // Rprintf("idx %d",idx);
    zeroInt(in, nsample);
    zeroInt(varUsed, mdim);
    /* Draw a random sample for growing a tree. */
    /*Rprintf("The useweights flag was set to %d", *useweights);*/
    //sampleDataRows(nsample, *sampsize, *useweights, *replace, weights, sampledIndices);
    sampleWithReplacement(nsample, nsample, sampledIndices);
    for(n = 0; n < nsample; ++n){
      k = sampledIndices[n];

      in[k] += 1;
      yb[n] = y[k];

      for(m = 0; m < mdim; ++m) {
        xb[m + n * mdim] = x[m + k * mdim];
      }
    }
    //
    //
    // /* grow the regression tree */
    regTree(xb, yb, mdim, nsample, lDaughter + idx, rDaughter + idx,
            upper + idx, avnode + idx, nodestatus + idx, nrnodes,
            treeSize + j,nthsize, mtry, mbest + idx, cat,
            varUsed);

    ts=treeSize[j];

    // Rprintf("treeSize %d",treeSize[j]);

//
//     /* Predict point xpred */
    predictRegTree(x, nsample, mdim, lDaughter + idx,
                   rDaughter + idx, nodestatus + idx, ytree,
                   upper + idx, avnode + idx,
                   mbest + idx, treeSize[j], cat, *maxcat);
    /* ytree is the prediction for test data by the current tree */
    /* yTestPred is the average prediction by all trees grown so far */

    for (n = 0; n < nsample; ++n) {
      ypred[n] = (j * ypred[n] + ytree[n]) / (j + 1);
    }

  }
  PutRNGstate();
  /* end of tree iterations=======================================*/

}

/*--
 Function for predicting from a trained randomforest model
 --------------------------------------------------------------------*/
void regForest(double *x, double *ypred, int *mdim, int *n,
               int ntree, int *lDaughter, int *rDaughter,
               int *nodestatus, int *nrnodes, double *xsplit,
               double *avnodes, int *mbest, int *treeSize, int *cat,
               int *maxcat, int nodes) {
  int i, j, idx1, idx2, *junk;
  double *ytree;

 // Rprintf("nodestatus=%d",avnodes[0]);

  //ytree = 0.0;//(double *) S_alloc(*n, sizeof(double));
  // if (nodes) {
  //   zeroInt(nodex, *ntree);
  // } else {
  //   zeroInt(nodex, 1);
  // }
  //nodex = 0;
  // ypred= (double *) Calloc(1, double);
  // ypred[0]=0.0;
//
  idx1 = 0;
  idx2 = 0;
  for (i = 0; i <ntree ; ++i) {
    //Rprintf("i=%d",i);
    ytree      = (double *) S_alloc(n, sizeof(double));
    zeroDouble(ytree, n);

    predictRegTree(x, n, mdim, lDaughter + idx1, rDaughter + idx1,
                   nodestatus + idx1, ytree, xsplit + idx1,
                   avnodes + idx1, mbest + idx1,treeSize[i], cat, maxcat
                   );//treeSize[i]
   // Rprintf("ytree=%f",ytree[0]);
   for (j = 0; j < n; ++j) ypred[j] += ytree[j];
   idx1 += *(nrnodes+i); /* increment the offset */

    //if (*nodes) idx2 += *n;
  }
  for (i = 0; i < n; ++i) ypred[i] /= ntree;
    //ypred/=ntree;
    //Rprintf("ytree=%f",ytree[0]);
}

/* Compute simple linear regression of y on x, returning the coefficients,
 the average squared residual, and the predicted values (overwriting y). */
void simpleLinReg(int nsample, double *x, double *y, double *coef,
                  double *mse, int *hasPred) {
  int i, nout = 0;
  double sxx=0.0, sxy=0.0, xbar=0.0, ybar=0.0;
  double dx = 0.0, dy = 0.0, py=0.0;

  for (i = 0; i < nsample; ++i) {
    if (hasPred[i]) {
      nout++;
      xbar += x[i];
      ybar += y[i];
    }
  }
  xbar /= nout;
  ybar /= nout;

  for (i = 0; i < nsample; ++i) {
    if (hasPred[i]) {
      dx = x[i] - xbar;
      dy = y[i] - ybar;
      sxx += dx * dx;
      sxy += dx * dy;
    }
  }
  coef[1] = sxy / sxx;
  coef[0] = ybar - coef[1] * xbar;

  *mse = 0.0;
  for (i = 0; i < nsample; ++i) {
    if (hasPred[i]) {
      py = coef[0] + coef[1] * x[i];
      dy = y[i] - py;
      *mse += dy * dy;
      /* y[i] = py; */
    }
  }
  *mse /= nout;
  return;
}
