---
title: "Untitled"
author: "Tian Qin"
date: "2024-01-14"
output: html_document
---


###Library
```{r}
library(foreign)
library(RiemannLebesgueForest)
library(randomForest)
library(MASS)
library(caret)
library(gbm)
library(xgboost)

```




###Forestfire

```{r}

#clean up memory
rm(list=ls())
gc()
forestfires<- read.arff('forest_fire.arff')

 set.seed(42)
df<-forestfires[sample(nrow(forestfires)),]
df$month <- as.factor(df$month)
df$day <- as.factor(df$day)
folds <- createFolds(df$area, k = 10,list = FALSE)

```
```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )

# 
# # 
# rlf_forestfires <- data.frame(lf=mse_rlf,rf=mse_rf)
# write.csv(rlf_forestfires,'rlf_forestfires.csv')
# rlf_forestfires_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_forestfires_time,'rlf_forestfires_time.csv')
```

###Studentperformance

```{r}
#clean up memory
rm(list=ls())
gc()
Studentperformance<- read.arff('Studentperformance.arff')


 set.seed(42)
df<-Studentperformance[sample(nrow(Studentperformance)),]
#df$area <- log(df$area+1)

```
```{r,warning=FALSE}


#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
folds <- createFolds(df$G3, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -33]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,33]
    xtrain <- df[-testIndexes, -33]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,33]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()
}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




# rlf_Studentperformance_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_Studentperformance_time,'rlf_Studentperformance_time.csv')
```




###cars

```{r}
#clean up memory
rm(list=ls())
gc()
cars<- read.arff('cars.arff')

 set.seed(42)
df<-cars[sample(nrow(cars)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$Price, k = 10,list = FALSE)
```
```{r,warning=FALSE}
mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
time_gbm <- c()
time_xgb <- c()
mse_gbm <- c()
mse_xgb <- c()
for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_cars_time <-data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_cars_time,'rlf_cars_time.csv')

```


###abalone

```{r}
#clean up memory
rm(list=ls())
gc()
abalone <- read.arff("abalone.arff")



 set.seed(42)
df<-abalone[sample(nrow(abalone)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$rings, k = 10,list = FALSE)



```

```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   
   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_abalone_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_abalone_time,'rlf_abalone_time.csv')



```


###Kinematics of Robot Arm

```{r}
#clean up memory
rm(list=ls())
gc()
#untar("Ailerons/delta_ailerons.tgz",files='delta_ailerons.data')
Kinematics<-  read.arff("kin8nm.arff")#read.table("Kinematics/kin8nm.data",header = FALSE)

set.seed(42)
df<-Kinematics[sample(nrow(Kinematics)),]

folds <- createFolds(df$y, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)



```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )


# 
# rlf_kin8nm_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_kin8nm_time,'rlf_kin8nm_time.csv')




```



###naval plant

```{r}
#clean up memory
rm(list=ls())
gc()

naval<-  read.arff("naval_propulsion.arff")

set.seed(42)
df<-naval[sample(nrow(naval)),]

folds <- createFolds(df$gt_compressor_decay_state_coefficient, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -15]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,15]
    xtrain <- df[-testIndexes, -15]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,15]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# 
# rlf_naval_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_naval_time,'rlf_naval_time.csv')
# 


```




### geographical_origin_of_music

```{r}
#clean up memory
rm(list=ls())
gc()

geomusic<-  read.arff("geomusic.arff")

set.seed(42)
df<-geomusic[sample(nrow(geomusic)),-118] #remove longitude

folds <- createFolds(df$latitude, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -117]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,117]
    xtrain <- df[-testIndexes, -117]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,117]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()



}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )


# 
# rlf_geomusic_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_geomusic_time,'rlf_geomusic_time.csv')




```

### solar

```{r}
#clean up memory
rm(list=ls())
gc()

solar_flare<-  read.arff("solar_flare.arff")

set.seed(42)
df<-solar_flare[sample(nrow(solar_flare)),-c(12,13)] #remove longitude

folds <- createFolds(df$c_class_flares, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -11]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,11]
    xtrain <- df[-testIndexes, -11]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,11]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_solar_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_solar_time,'rlf_solar_time.csv')



```



### concrete

```{r}
#clean up memory
rm(list=ls())
gc()

concrete<-  read.arff("concrete.arff")

set.seed(42)
df<-concrete[sample(nrow(concrete)),] #remove longitude

folds <- createFolds(df$strength, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



rlf_concrete_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_concrete_time,'rlf_concrete_time.csv')



```




### socmob

```{r}
#clean up memory
rm(list=ls())
gc()

socmob<-  read.arff("socmob.arff")

set.seed(42)
df<-socmob[sample(nrow(socmob)),] #remove longitude

folds <- createFolds(df$counts_for_sons_current_occupation, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -6]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,6]
    xtrain <- df[-testIndexes, -6]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,6]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




# rlf_socmob_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_socmob_time,'rlf_socmob_time.csv')
# 



```


### energy

```{r}
#clean up memory
rm(list=ls())
gc()

energy<-  read.arff("energy.arff")

set.seed(42)
df<-energy[sample(nrow(energy)),-10] #remove cooling load

folds <- createFolds(df$heating_load, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -9]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,9]
    xtrain <- df[-testIndexes, -9]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,9]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )


# 
# rlf_energy_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_energy_time,'rlf_energy_time.csv')




```

### fish

```{r}
#clean up memory
rm(list=ls())
gc()

fish<-  read.arff("fish.arff")

set.seed(42)
df<-fish[sample(nrow(fish)),] #remove cooling load

folds <- createFolds(df$LC50, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -7]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,7]
    xtrain <- df[-testIndexes, -7]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,7]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_fish_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_fish_time,'rlf_fish_time.csv')
# 



```


### airfoil

```{r}
#clean up memory
rm(list=ls())
gc()

airfoil<-  read.arff("airfoil.arff")

set.seed(42)
df<-airfoil[sample(nrow(airfoil)),] #remove cooling load

folds <- createFolds(df$sound_pressure, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
time_gbm <- c()
time_xgb <- c()
mse_gbm <- c()
mse_xgb <- c()
for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -6]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,6]
    xtrain <- df[-testIndexes, -6]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,6]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_airfoil_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_airfoil_time,'rlf_airfoil_time.csv')





```



### redwine

```{r}
#clean up memory
rm(list=ls())
gc()

redwine<-  read.arff("redwine.arff")

set.seed(42)
df<-redwine[sample(nrow(redwine)),] #remove cooling load

folds <- createFolds(df$quality, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()
time_gbm <- c()
time_xgb <- c()
mse_gbm <- c()
mse_xgb <- c()
for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -12]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,12]
    xtrain <- df[-testIndexes, -12]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,12]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   
   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# 
# rlf_redwine_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_redwine_time,'rlf_redwine_time.csv')
# 



```


### auction

```{r}
#clean up memory
rm(list=ls())
gc()

auction<-  read.arff("auction.arff")

set.seed(42)
df<-auction[sample(nrow(auction)),-8] #remove cooling load

folds <- createFolds(df$verification.time, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -8]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,8]
    xtrain <- df[-testIndexes, -8]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,8]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_auction_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_auction_time,'rlf_auction_time.csv')




```


### space_ga

```{r}
#clean up memory
rm(list=ls())
gc()

space_ga<-  read.arff("space_ga.arff")

set.seed(42)
df<-space_ga[sample(nrow(space_ga)),] #remove cooling load

folds <- createFolds(df$ln_votes_pop, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -1]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,1]
    xtrain <- df[-testIndexes, -1]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,1]
    

    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )


# 
# rlf_space_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_space_time,'rlf_space_time.csv')



```

### whitewine

```{r}
#clean up memory
rm(list=ls())
gc()

whitewine<-  read.arff("whitewine.arff")

set.seed(42)
df<-whitewine[sample(nrow(whitewine)),] 

folds <- createFolds(df$quality, k = 10,list = FALSE)




```





```{r,warning=FALSE}

mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -12]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,12]
    xtrain <- df[-testIndexes, -12]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,12]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )
cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )

# 
# 
# rlf_white_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_white_time,'rlf_white_time.csv')




```



### cpu

```{r}
#clean up memory
rm(list=ls())
gc()

cpu<-  read.arff("cpu.arff")

set.seed(42)
df<-cpu[sample(nrow(cpu)),] 

folds <- createFolds(df$usr, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -22]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,22]
    xtrain <- df[-testIndexes, -22]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,22]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}


cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# rlf_cpu_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_cpu_time,'rlf_cpu_time.csv')




```


### pumadyn

```{r}
#clean up memory
rm(list=ls())
gc()

pumadyn<-  read.arff("pumadyn.arff")

set.seed(42)
df<-pumadyn[sample(nrow(pumadyn)),] 

folds <- createFolds(df$thetadd6, k = 10,list = FALSE)




```





```{r,warning=FALSE}



mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -33]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,33]
    xtrain <- df[-testIndexes, -33]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,33]
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()


}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )
cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




# rlf_pumadyn_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_pumadyn_time,'rlf_pumadyn_time.csv')


```




### grid

```{r}
#clean up memory
rm(list=ls())
gc()

grid<-  read.arff("grid.arff")

set.seed(42)
df<-grid[sample(nrow(grid)),-14] #remove stabf 

folds <- createFolds(df$stab, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )




rlf_grid_time <- data.frame(lf=time_rlf,rf=time_rf)
write.csv(rlf_grid_time,'rlf_grid_time.csv')




```



### Brazil Housing

```{r}
#clean up memory
rm(list=ls())
gc()

brazilianhousing<-  read.arff("brazilianhousing.arff")

set.seed(42)
df<-brazilianhousing[sample(nrow(brazilianhousing)),] 
df$total <- log(df$total)
folds <- createFolds(df$total, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -13]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,13]
    xtrain <- df[-testIndexes, -13]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,13]
    
    

   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )


# 
# rlf_brazilianhousing_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_brazilianhousing_time,'rlf_brazilianhousing_time.csv')
# 



```






### Miami Housing

```{r}
#clean up memory
rm(list=ls())
gc()

miamihousing<-  read.arff("miamihousing.arff")

set.seed(42)
df<-miamihousing[sample(nrow(miamihousing)),] 
df$SALE_PRC <- log(df$SALE_PRC)
folds <- createFolds(df$SALE_PRC, k = 10,list = FALSE)




```





```{r,warning=FALSE}


mse_rf <- c()
mse_rlf <- c()
time_rf <- c()
time_rlf <- c()

for(i in 1:10){
    testIndexes <- which(folds==i,arr.ind=TRUE)
    xtest<- df[testIndexes, -4]
    #xtest<- xtest[,-2]
    ytest <- df[testIndexes,4]
    xtrain <- df[-testIndexes, -4]
    #xtrain <- xtrain[,-2]
    ytrain <- df[-testIndexes,4]
    


   start.time_rlf <- Sys.time()
   rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE,Lp=0.8,nodesize =1)
   pred <- predict(rlf,newdata = xtest)
   end.time_rlf <- Sys.time()
   mse_rlf[i] <- mean((pred-ytest)^2)
   
   time.taken_rlf <-difftime(end.time_rlf, start.time_rlf, units = "secs")[[1]]
   print(mse_rlf[i])
   time_rlf[i] <-  time.taken_rlf
   rm(rlf)
    
   start.time_rf <- Sys.time()
   rf <- randomForest(ytrain~.,data=xtrain,ntree=100,nodesize =1)
   pred <- predict(rf,newdata = xtest)
   mse_rf[i]<- mean((pred-ytest)^2)
   end.time_rf <- Sys.time()
   print(mse_rf[i])
   time.taken_rf <- difftime(end.time_rf, start.time_rf, units = "secs")[[1]]
   time_rf[i] <- time.taken_rf   
   

   
   rm(rf)
   rm(xtest)
   rm(ytest)
   rm(xtrain)
   rm(ytrain)
   rm(pred)
  gc()

}

cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of RLF:',mean(mse_rlf))

cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_rlf)/sqrt(10) )

cat('\n RF running time:',mean(time_rf))
cat('\n RLF running time:',mean(time_rlf))

cat('\n margin of error of RF running time:',qt(0.975,9)* sd(time_rf)/sqrt(10)  )
cat('\n margin of error RLF running time:',qt(0.975,9)* sd(time_rlf)/sqrt(10) )



# 
# rlf_miamihousing_time <- data.frame(lf=time_rlf,rf=time_rf)
# write.csv(rlf_miamihousing_time,'rlf_miamihousing_time.csv')





```




