---
title: "time_efficiency"
author: "Tian Qin"
date: "2024-03-25"
output: html_document
---



###Library
```{r}
library(foreign)
library(RiemannLebesgueForest)
library(randomForest)
library(MASS)
library(caret)


```



###Forestfire

```{r}

#clean up memory
rm(list=ls())
gc()
forestfires<- read.arff('forest_fire.arff')

 set.seed(42)
df<-forestfires[sample(nrow(forestfires)),]
folds <- createFolds(df$area, k = 5,list = FALSE)

# train_index <- createDataPartition(df$area, p =0.8, 
#                                   list = FALSE, 
#                                   times = 1)
# xtrain <- df[train_index,-13]
# ytrain <- df[train_index,13]
# 
# validation_index<- createDataPartition(ytrain,
#                                            p = 0.25,
#                                            list = FALSE,
#                                            times = 1)
# xtrain_train <- xtrain[-validation_index,]
# ytrain_train <- ytrain[-validation_index]
# xtrain_valid <- xtrain[validation_index,]
# ytrain_valid <- ytrain[validation_index]
# xtest <- df[-train_index,-13]
# ytest <- df[-train_index,13]

```
```{r,warning=FALSE}

#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    
    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
# 
rlf_forestfiresttune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_forestfiresttune,'rlf_forestfiresttune.csv')

# execute full cartesian grid search


```







###Studentperformance

```{r}
#clean up memory
rm(list=ls())
gc()
Studentperformance<- read.arff('Studentperformance.arff')


 set.seed(42)
df<-Studentperformance[sample(nrow(Studentperformance)),]
#df$area <- log(df$area+1)
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
folds <- createFolds(df$G3, k = 5,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
```
```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
mse_lf1 <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -33]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,33]
    xtrain <- df[-testIndexes, -33]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,33]
    

    
    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()
}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_Studentperformancetune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_Studentperformancetune,'rlf_Studentperformancetune.csv')
```

### energy

```{r}
#clean up memory
rm(list=ls())
gc()

energy<-  read.arff("energy.arff")

set.seed(42)
df<-energy[sample(nrow(energy)),-10] #remove cooling load

folds <- createFolds(df$heating_load, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    
    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_energytune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_energytune,'rlf_energytune.csv')


```


###cars

```{r}
#clean up memory
rm(list=ls())
gc()
cars<- read.arff('cars.arff')

 set.seed(42)
df<-cars[sample(nrow(cars)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$Price, k = 5,list = FALSE)
```
```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_rlf <- c()
mse_lf1 <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    
    
    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()
}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_carstune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_carstune,'rlf_carstune.csv')
```


### QSAR

```{r}
#clean up memory
rm(list=ls())
gc()

fish<-  read.arff("fish.arff")

set.seed(42)
df<-fish[sample(nrow(fish)),] #remove cooling load

folds <- createFolds(df$LC50, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -7]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,7]
    xtrain <- df[-testIndexes, -7]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,7]
    

    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_fishtune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_fishtune,'rlf_fishtune.csv')


```

### concrete

```{r}
#clean up memory
rm(list=ls())
gc()

concrete<-  read.arff("concrete.arff")

set.seed(42)
df<-concrete[sample(nrow(concrete)),] #remove longitude

folds <- createFolds(df$strength, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    


    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_concretetune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_concretetune,'rlf_concretetune.csv')


```



### socmob

```{r}
#clean up memory
rm(list=ls())
gc()

socmob<-  read.arff("socmob.arff")

set.seed(42)
df<-socmob[sample(nrow(socmob)),] #remove longitude

folds <- createFolds(df$counts_for_sons_current_occupation, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -6]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,6]
    xtrain <- df[-testIndexes, -6]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,6]
    
    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_socmobtune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_socmobtune,'rlf_socmobtune.csv')


```


### geographical_origin_of_music

```{r}
#clean up memory
rm(list=ls())
gc()

geomusic<-  read.arff("geomusic.arff")

set.seed(42)
df<-geomusic[sample(nrow(geomusic)),-118] #remove longitude

folds <- createFolds(df$latitude, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -117]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,117]
    xtrain <- df[-testIndexes, -117]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,117]
    

    
    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()
}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_geomusictune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_geomusictune,'rlf_geomusictune.csv')


```

### solar

```{r}
#clean up memory
rm(list=ls())
gc()

solar_flare<-  read.arff("solar_flare.arff")

set.seed(42)
df<-solar_flare[sample(nrow(solar_flare)),-c(12,13)] #remove longitude

folds <- createFolds(df$c_class_flares, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -11]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,11]
    xtrain <- df[-testIndexes, -11]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,11]
    

    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_solartune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_solartune,'rlf_solartune.csv')


```


### airfoil

```{r}
#clean up memory
rm(list=ls())
gc()

airfoil<-  read.arff("airfoil.arff")

set.seed(42)
df<-airfoil[sample(nrow(airfoil)),] #remove cooling load

folds <- createFolds(df$sound_pressure, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -6]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,6]
    xtrain <- df[-testIndexes, -6]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,6]
    


    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_airfoiltune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_airfoiltune,'rlf_airfoiltune.csv')


```


### redwine

```{r}
#clean up memory
rm(list=ls())
gc()

redwine<-  read.arff("redwine.arff")

set.seed(42)
df<-redwine[sample(nrow(redwine)),] #remove cooling load

folds <- createFolds(df$quality, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -12]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,12]
    xtrain <- df[-testIndexes, -12]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,12]
    



    validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_redwinetune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_redwinetune,'rlf_redwinetune.csv')


```


### auction

```{r}
#clean up memory
rm(list=ls())
gc()

auction<-  read.arff("auction.arff")

set.seed(42)
df<-auction[sample(nrow(auction)),-8] #remove cooling load

folds <- createFolds(df$verification.time, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -8]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,8]
    xtrain <- df[-testIndexes, -8]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,8]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_auctiontune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_auctiontune,'rlf_auctiontune.csv')


```


### space_ga

```{r}
#clean up memory
rm(list=ls())
gc()

space_ga<-  read.arff("space_ga.arff")

set.seed(42)
df<-space_ga[sample(nrow(space_ga)),] #remove cooling load

folds <- createFolds(df$ln_votes_pop, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    

  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_spacetune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_spacetune,'rlf_spacetune.csv')


```



###abalone

```{r}
#clean up memory
rm(list=ls())
gc()
abalone <- read.arff("abalone.arff")



 set.seed(42)
df<-abalone[sample(nrow(abalone)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$rings, k = 5,list = FALSE)



```

```{r,warning=FALSE}


mse_rf <- c()
mse_lf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    

  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()
}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_abalonetune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_abalonetune,'rlf_abalonetune.csv')

```




### whitewine

```{r}
#clean up memory
rm(list=ls())
gc()

whitewine<-  read.arff("whitewine.arff")

set.seed(42)
df<-whitewine[sample(nrow(whitewine)),] 

folds <- createFolds(df$quality, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -12]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,12]
    xtrain <- df[-testIndexes, -12]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,12]
    

  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_whitetune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_whitetune,'rlf_whitetune.csv')


```



### cpu

```{r}
#clean up memory
rm(list=ls())
gc()

cpu<-  read.arff("cpu.arff")

set.seed(42)
df<-cpu[sample(nrow(cpu)),] 

folds <- createFolds(df$usr, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -22]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,22]
    xtrain <- df[-testIndexes, -22]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,22]
    


  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()


}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_cputune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_cputune,'rlf_cputune.csv')


```





###Kinematics of Robot Arm

```{r}
#clean up memory
rm(list=ls())
gc()
#untar("Ailerons/delta_ailerons.tgz",files='delta_ailerons.data')
Kinematics<-  read.arff("kin8nm.arff")#read.table("Kinematics/kin8nm.data",header = FALSE)

set.seed(42)
df<-Kinematics[sample(nrow(Kinematics)),]

folds <- createFolds(df$y, k = 5,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)



```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    

  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()


}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_kin8nmtune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_kin8nmtune,'rlf_kin8nmtune.csv')


```

### pumadyn

```{r}
#clean up memory
rm(list=ls())
gc()

pumadyn<-  read.arff("pumadyn.arff")

set.seed(42)
df<-pumadyn[sample(nrow(pumadyn)),] 

folds <- createFolds(df$thetadd6, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -33]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,33]
    xtrain <- df[-testIndexes, -33]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,33]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()


}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_pumadyntune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_pumadyntune,'rlf_pumadyntune.csv')


```



### grid

```{r}
#clean up memory
rm(list=ls())
gc()

grid<-  read.arff("grid.arff")

set.seed(42)
df<-grid[sample(nrow(grid)),-14] #remove stabf 

folds <- createFolds(df$stab, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()


}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_gridtune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_gridtune,'rlf_gridtune.csv')


```


### Brazil Housing

```{r}
#clean up memory
rm(list=ls())
gc()

brazilianhousing<-  read.arff("brazilianhousing.arff")

set.seed(42)
df<-brazilianhousing[sample(nrow(brazilianhousing)),] 
df$total <- log(df$total)
folds <- createFolds(df$total, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_brazilianhousingtune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_brazilianhousingtune,'rlf_brazilianhousingtune.csv')


```




###naval plant

```{r}
#clean up memory
rm(list=ls())
gc()

naval<-  read.arff("naval_propulsion.arff")

set.seed(42)
df<-naval[sample(nrow(naval)),]

folds <- createFolds(df$gt_compressor_decay_state_coefficient, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -15]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,15]
    xtrain <- df[-testIndexes, -15]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,15]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_navaltune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_navaltune,'rlf_navaltune.csv')


```


### Miami Housing

```{r}
#clean up memory
rm(list=ls())
gc()

miamihousing<-  read.arff("miamihousing.arff")

set.seed(42)
df<-miamihousing[sample(nrow(miamihousing)),] 
df$SALE_PRC <- log(df$SALE_PRC)
folds <- createFolds(df$SALE_PRC, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -4]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,4]
    xtrain <- df[-testIndexes, -4]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,4]
    

  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_miamihousingtune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_miamihousingtune,'rlf_miamihousingtune.csv')


```



### Fifa

```{r}
#clean up memory
rm(list=ls())
gc()

fifa<-  read.arff("fifa.arff")

set.seed(42)
df<-fifa[sample(nrow(fifa)),-5]  #remove nationality since RF can't handle  categorical predictors with more than 53 categories.
df$wage_eur <- log(df$wage_eur)
folds <- createFolds(df$wage_eur, k = 5,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error of RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_fifatune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_fifatune,'rlf_fifatune.csv')


```


###Califonia housing price
```{r}

#clean up memory
rm(list=ls())
gc()
set.seed(42)
calihou <- read.arff('calinforniahousing.arff')

df<-calihou[sample(nrow(calihou)),] 

df$medianHouseValue <- log(df$medianHouseValue)
folds <- createFolds(df$medianHouseValue, k = 5,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

mse_rf <- c()
mse_rlf <- c()
mse_lf1 <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_calihoutune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_calihoutune,'rlf_calihoutune.csv')

```


###superconductivity
```{r}
#clean up memory
rm(list=ls())
gc()
set.seed(42)
superconductivity <- read.arff('superconductivity.arff')

df<-superconductivity[sample(nrow(superconductivity)),] 


folds <- createFolds(df$critical_temp, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

mse_rf <- c()
mse_rlf <- c()

for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -82]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,82]
    xtrain <- df[-testIndexes, -82]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,82]
    
    
validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_superconductivitytune <- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_superconductivitytune,'rlf_superconductivitytune.csv')



```



###cpswage
```{r}
#clean up memory
rm(list=ls())
gc()
set.seed(42)
cps88wage <- read.arff('cps88wage.arff')

df<-cps88wage[sample(nrow(cps88wage)),] #ignore sampling weight

df$wage <- log(df$wage)

folds <- createFolds(df$wage, k = 5,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

mse_rf <- c()
mse_rlf <- c()
mse_lf1 <- c()
for(k in 1:5){
    testIndexes <- which(folds==k,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    
  validation_index<- createDataPartition(ytrain,
                                           p = 0.25,
                                           list = FALSE,
                                           times = 1)
xtrain_train <- xtrain[-validation_index,]
ytrain_train <- ytrain[-validation_index]
xtrain_valid <- xtrain[validation_index,]
ytrain_valid <- ytrain[validation_index]

hyper_grid_rlf <- expand.grid(
  Lp=c(0.4,0.6,0.8),
  mse=NA
)

    for(i in seq_len(nrow(hyper_grid_rlf))) {
  # fit model for ith hyperparameter combination
  fit <- RLForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = 100,
    replace         = FALSE,
    ntreesub=10,
    Lp = hyper_grid_rlf$Lp[i],

    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rlf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rlf <- hyper_grid_rlf[order(hyper_grid_rlf$mse),]




hyper_grid_rf <- expand.grid(
  min.node.size = c( 5, 10,15), 
  n.trees = c(50,100,150,200),
  mse=NA
)
# execute full cartesian grid search
for(i in seq_len(nrow(hyper_grid_rf))) {
  # fit model for ith hyperparameter combination
  fit <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = hyper_grid_rf$n.trees[i],
    nodesize    = hyper_grid_rf$min.node.size[i],
    verbose         = FALSE
  )
  # export OOB error 
  hyper_grid_rf$mse[i] <- mean((predict(fit,newdata =xtrain_valid)-ytrain_valid)^2)
}

ordered_hyper_grid_rf <- hyper_grid_rf[order(hyper_grid_rf$mse),]


   rf <- randomForest(
    formula         = ytrain_train ~ ., 
    data            = xtrain_train, 
    ntree       = ordered_hyper_grid_rf$n.trees[1],
    nodesize    = ordered_hyper_grid_rf$min.node.size[1],
    verbose         = FALSE
  )
  mse_rf[k] <-mean((predict(rf,newdata =xtest)-ytest)^2)
   
  #  
      rlf <-RLForest(
    formula         = ytrain_train ~ .,
    data            = xtrain_train,
    ntree=100,
        ntreesub=10,
    Lp = ordered_hyper_grid_rlf$Lp[1],
    replace=FALSE,
    verbose         = FALSE
  ) #RLForest(ytrain_train~.,data=xtrain_train,ntree=100,ntreesub=10,replace=FALSE,Lp=Lps[i])


     mse_rlf[k]<- mean((predict(rlf,newdata = xtest)-ytest)^2)

   
   rm(rf)
   rm(rlf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(xtrain_train)
   rm(ytrain_train)
   rm(xtrain_valid)
   rm(xtrain_valid)
   gc()

   

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_rlf))
cat('\n margin of error of RF:',qt(0.975,4)* sd(mse_rf)/sqrt(5)  )
cat('\n margin of error RLF:',qt(0.975,4)* sd(mse_rlf)/sqrt(5) )
rlf_cps88wagetune<- data.frame(lf=mse_rlf,rf=mse_rf)
write.csv(rlf_cps88wagetune,'rlf_cps88wagetune.csv')

```


