---
title: "Untitled"
author: "Tian Qin"
date: "2024-01-14"
output: html_document
---


###Library
```{r}
library(foreign)
library(RiemannLebesgueForest)
library(randomForest)
library(MASS)
library(caret)
library(gbm)
library(xgboost)

```




###Forestfire

```{r}

#clean up memory
rm(list=ls())
gc()
forestfires<- read.arff('forest_fire.arff')

 set.seed(42)
df<-forestfires[sample(nrow(forestfires)),]
df$month <- as.factor(df$month)
df$day <- as.factor(df$day)
folds <- createFolds(df$area, k = 10,list = FALSE)

```
```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )

# 
# # 
# rlf_forestfires <- data.frame(lf=mse_rlf,rf=mse_rf)
# write.csv(rlf_forestfires,'rlf_forestfires.csv')
rlf_forestfires_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_forestfires_time,'rlf_forestfires_time.csv')
```

###Studentperformance

```{r}
#clean up memory
rm(list=ls())
gc()
Studentperformance<- read.arff('Studentperformance.arff')


 set.seed(42)
df<-Studentperformance[sample(nrow(Studentperformance)),]
#df$area <- log(df$area+1)

```
```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
folds <- createFolds(df$G3, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -33]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,33]
    xtrain <- df[-testIndexes, -33]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,33]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()
}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_Studentperformance_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_Studentperformance_time,'rlf_Studentperformance_time.csv')
```




###cars

```{r}
#clean up memory
rm(list=ls())
gc()
cars<- read.arff('cars.arff')

 set.seed(42)
df<-cars[sample(nrow(cars)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$Price, k = 10,list = FALSE)
```
```{r,warning=FALSE}
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
time_gbm <- c()
time_xgb <- c()
mse_gbm <- c()
mse_xgb <- c()
for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_cars_time <-data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_cars_time,'rlf_cars_time.csv')

```


###abalone

```{r}
#clean up memory
rm(list=ls())
gc()
abalone <- read.arff("abalone.arff")



 set.seed(42)
df<-abalone[sample(nrow(abalone)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$rings, k = 10,list = FALSE)



```

```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   
   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_abalone_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_abalone_time,'rlf_abalone_time.csv')



```


###Kinematics of Robot Arm

```{r}
#clean up memory
rm(list=ls())
gc()
#untar("Ailerons/delta_ailerons.tgz",files='delta_ailerons.data')
Kinematics<-  read.arff("kin8nm.arff")#read.table("Kinematics/kin8nm.data",header = FALSE)

set.seed(42)
df<-Kinematics[sample(nrow(Kinematics)),]

folds <- createFolds(df$y, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)



```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_kin8nm_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_kin8nm_time,'rlf_kin8nm_time.csv')




```



###naval plant

```{r}
#clean up memory
rm(list=ls())
gc()

naval<-  read.arff("naval_propulsion.arff")

set.seed(42)
df<-naval[sample(nrow(naval)),]

folds <- createFolds(df$gt_compressor_decay_state_coefficient, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -15]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,15]
    xtrain <- df[-testIndexes, -15]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,15]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_naval_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_naval_time,'rlf_naval_time.csv')



```




### geographical_origin_of_music

```{r}
#clean up memory
rm(list=ls())
gc()

geomusic<-  read.arff("geomusic.arff")

set.seed(42)
df<-geomusic[sample(nrow(geomusic)),-118] #remove longitude

folds <- createFolds(df$latitude, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -117]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,117]
    xtrain <- df[-testIndexes, -117]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,117]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()



}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_geomusic_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_geomusic_time,'rlf_geomusic_time.csv')




```

### solar

```{r}
#clean up memory
rm(list=ls())
gc()

solar_flare<-  read.arff("solar_flare.arff")

set.seed(42)
df<-solar_flare[sample(nrow(solar_flare)),-c(12,13)] #remove longitude

folds <- createFolds(df$c_class_flares, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -11]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,11]
    xtrain <- df[-testIndexes, -11]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,11]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_solar_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_solar_time,'rlf_solar_time.csv')



```



### concrete

```{r}
#clean up memory
rm(list=ls())
gc()

concrete<-  read.arff("concrete.arff")

set.seed(42)
df<-concrete[sample(nrow(concrete)),] #remove longitude

folds <- createFolds(df$strength, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_concrete_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_concrete_time,'rlf_concrete_time.csv')



```




### socmob

```{r}
#clean up memory
rm(list=ls())
gc()

socmob<-  read.arff("socmob.arff")

set.seed(42)
df<-socmob[sample(nrow(socmob)),] #remove longitude

folds <- createFolds(df$counts_for_sons_current_occupation, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -6]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,6]
    xtrain <- df[-testIndexes, -6]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,6]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_socmob_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_socmob_time,'rlf_socmob_time.csv')




```


### energy

```{r}
#clean up memory
rm(list=ls())
gc()

energy<-  read.arff("energy.arff")

set.seed(42)
df<-energy[sample(nrow(energy)),-10] #remove cooling load

folds <- createFolds(df$heating_load, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_energy_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_energy_time,'rlf_energy_time.csv')




```

### fish

```{r}
#clean up memory
rm(list=ls())
gc()

fish<-  read.arff("fish.arff")

set.seed(42)
df<-fish[sample(nrow(fish)),] #remove cooling load

folds <- createFolds(df$LC50, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -7]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,7]
    xtrain <- df[-testIndexes, -7]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,7]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_fish_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_fish_time,'rlf_fish_time.csv')




```


### airfoil

```{r}
#clean up memory
rm(list=ls())
gc()

airfoil<-  read.arff("airfoil.arff")

set.seed(42)
df<-airfoil[sample(nrow(airfoil)),] #remove cooling load

folds <- createFolds(df$sound_pressure, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
time_gbm <- c()
time_xgb <- c()
mse_gbm <- c()
mse_xgb <- c()
for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -6]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,6]
    xtrain <- df[-testIndexes, -6]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,6]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_airfoil_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_airfoil_time,'rlf_airfoil_time.csv')





```



### redwine

```{r}
#clean up memory
rm(list=ls())
gc()

redwine<-  read.arff("redwine.arff")

set.seed(42)
df<-redwine[sample(nrow(redwine)),] #remove cooling load

folds <- createFolds(df$quality, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
time_gbm <- c()
time_xgb <- c()
mse_gbm <- c()
mse_xgb <- c()
for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -12]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,12]
    xtrain <- df[-testIndexes, -12]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,12]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   
   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_redwine_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_redwine_time,'rlf_redwine_time.csv')




```


### auction

```{r}
#clean up memory
rm(list=ls())
gc()

auction<-  read.arff("auction.arff")

set.seed(42)
df<-auction[sample(nrow(auction)),-8] #remove cooling load

folds <- createFolds(df$verification.time, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -8]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,8]
    xtrain <- df[-testIndexes, -8]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,8]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_auction_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_auction_time,'rlf_auction_time.csv')




```


### space_ga

```{r}
#clean up memory
rm(list=ls())
gc()

space_ga<-  read.arff("space_ga.arff")

set.seed(42)
df<-space_ga[sample(nrow(space_ga)),] #remove cooling load

folds <- createFolds(df$ln_votes_pop, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_space_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_space_time,'rlf_space_time.csv')



```

### whitewine

```{r}
#clean up memory
rm(list=ls())
gc()

whitewine<-  read.arff("whitewine.arff")

set.seed(42)
df<-whitewine[sample(nrow(whitewine)),] 

folds <- createFolds(df$quality, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -12]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,12]
    xtrain <- df[-testIndexes, -12]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,12]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )
cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_white_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_white_time,'rlf_white_time.csv')




```



### cpu

```{r}
#clean up memory
rm(list=ls())
gc()

cpu<-  read.arff("cpu.arff")

set.seed(42)
df<-cpu[sample(nrow(cpu)),] 

folds <- createFolds(df$usr, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -22]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,22]
    xtrain <- df[-testIndexes, -22]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,22]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_cpu_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_cpu_time,'rlf_cpu_time.csv')




```


### pumadyn

```{r}
#clean up memory
rm(list=ls())
gc()

pumadyn<-  read.arff("pumadyn.arff")

set.seed(42)
df<-pumadyn[sample(nrow(pumadyn)),] 

folds <- createFolds(df$thetadd6, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -33]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,33]
    xtrain <- df[-testIndexes, -33]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,33]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )
cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_pumadyn_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_pumadyn_time,'rlf_pumadyn_time.csv')


```




### grid

```{r}
#clean up memory
rm(list=ls())
gc()

grid<-  read.arff("grid.arff")

set.seed(42)
df<-grid[sample(nrow(grid)),-14] #remove stabf 

folds <- createFolds(df$stab, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_grid_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_grid_time,'rlf_grid_time.csv')




```



### Brazil Housing

```{r}
#clean up memory
rm(list=ls())
gc()

brazilianhousing<-  read.arff("brazilianhousing.arff")

set.seed(42)
df<-brazilianhousing[sample(nrow(brazilianhousing)),] 
df$total <- log(df$total)
folds <- createFolds(df$total, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_brazilianhousing_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_brazilianhousing_time,'rlf_brazilianhousing_time.csv')




```






### Miami Housing

```{r}
#clean up memory
rm(list=ls())
gc()

miamihousing<-  read.arff("miamihousing.arff")

set.seed(42)
df<-miamihousing[sample(nrow(miamihousing)),] 
df$SALE_PRC <- log(df$SALE_PRC)
folds <- createFolds(df$SALE_PRC, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -4]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,4]
    xtrain <- df[-testIndexes, -4]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,4]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_miamihousing_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_miamihousing_time,'rlf_miamihousing_time.csv')





```




### Fifa

```{r}
#clean up memory
rm(list=ls())
gc()

fifa<-  read.arff("fifa.arff")

set.seed(42)
df<-fifa[sample(nrow(fifa)),-5]  #remove nationality since RF can't handle  categorical predictors with more than 53 categories.
df$wage_eur <- log(df$wage_eur)
folds <- createFolds(df$wage_eur, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_fifa_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_fifa_time,'rlf_fifa_time.csv')



```


### kingscounty

```{r}
#clean up memory
rm(list=ls())
gc()

kingscounty<-  read.arff("kingscounty.arff")

set.seed(42)


df<-kingscounty[sample(nrow(kingscounty)),-15]  #remove zipcode variable since RF can't handle  categorical predictors with more than 53 categories.

df$price <- log(df$price)
folds <- createFolds(df$price, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )





rlf_kingscounty_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_kingscounty_time,'rlf_kingscounty_time.csv')


```




###Califonia housing price
```{r}

#clean up memory
rm(list=ls())
gc()
set.seed(42)
calihou <- read.arff('calinforniahousing.arff')

df<-calihou[sample(nrow(calihou)),] 

df$medianHouseValue <- log(df$medianHouseValue)
folds <- createFolds(df$medianHouseValue, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_calihou_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_calihou_time,'rlf_calihou_time.csv')


```

###superconductivity
```{r}
#clean up memory
rm(list=ls())
gc()
set.seed(42)
superconductivity <- read.arff('superconductivity.arff')

df<-superconductivity[sample(nrow(superconductivity)),] 


folds <- createFolds(df$critical_temp, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -82]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,82]
    xtrain <- df[-testIndexes, -82]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,82]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_superconductivity_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_superconductivity_time,'rlf_superconductivity_time.csv')


```


###insurance
```{r}
#clean up memory
rm(list=ls())
gc()
set.seed(42)
healthinsurance <- read.arff('health_insurance.arff')

df<-healthinsurance[sample(nrow(healthinsurance)),-13] #ignore sampling weight


folds <- createFolds(df$whrswk, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_healthinsurance_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_healthinsurance_time,'rlf_healthinsurance_time.csv')



```


###cpswage
```{r}
#clean up memory
rm(list=ls())
gc()
set.seed(42)
cps88wage <- read.arff('cps88wage.arff')

df<-cps88wage[sample(nrow(cps88wage)),] #ignore sampling weight

df$wage <- log(df$wage)

folds <- createFolds(df$wage, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_cps88wage_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_cps88wage_time,'rlf_cps88wage_time.csv')


```


###protein
```{r}

#clean up memory
rm(list=ls())
gc()
set.seed(42)
protein <- read.arff('protein.arff')

df<-protein[sample(nrow(protein)),] 



folds <- createFolds(df$RMSD , k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    

 
   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()
}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_protein_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_protein_time,'rlf_protein_time.csv')


```


###sarcos
```{r}
#clean up memory
rm(list=ls())
gc()
sarcos <- read.arff('sarcos.arff')

df<-sarcos[sample(nrow(sarcos)),] 



folds <- createFolds(df$V22 , k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)

```

```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -22]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,22]
    xtrain <- df[-testIndexes, -22]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,22]
   #  
   #  
   # 
   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=1,replace=FALSE,Lp=0.8)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)

   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)

   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf


   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_sarcosn_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_sarcosn_time,'rlf_sarcosn_time.csv')




```



