library(foreign)
library(RiemannLebesgueForest)
library(randomForest)
library(MASS)
library(caret)
#clean up memory
rm(list=ls())
gc()
forestfires<- read.arff('forest_fire.arff')
set.seed(42)
df<-forestfires[sample(nrow(forestfires)),]
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
folds <- createFolds(df$area, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_lf <- c()
mse_lf1 <- c()
for(i in 1:10){
testIndexes <- which(folds==i,arr.ind=TRUE)
xtest<- df[testIndexes, -13]
#xtest<- xtest[,-2]
ytest <- df[testIndexes,13]
xtrain <- df[-testIndexes, -13]
#xtrain <- xtrain[,-2]
ytrain <- df[-testIndexes,13]
# preproc <- preProcess(xtrain, method = c("center", "scale"))
# scaled.train <- predict(preproc, newdata = xtrain)
# scaled.test <- predict(preproc, newdata = xtest)
rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE)
mse_lf[i] <- mean((predict(rlf,newdata = xtest)-ytest)^2)
print(mse_lf[i])
rm(rlf)
rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
mse_rf[i]<- mean((predict(rf,newdata = xtest)-ytest)^2)
print(mse_rf[i])
rm(rf)
rm(xtest)
rm(ytest)
rm(xtrain)
rm(ytrain)
gc()
}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_lf))
cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_lf)/sqrt(10) )
#
rlf_forestfires <- data.frame(lf=mse_lf,rf=mse_rf)
write.csv(rlf_forestfires,'rlf_forestfires.csv')
#clean up memory
rm(list=ls())
gc()
Studentperformance<- read.arff('Studentperformance.arff')
set.seed(42)
df<-Studentperformance[sample(nrow(Studentperformance)),]
#df$area <- log(df$area+1)
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
folds <- createFolds(df$G3, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_lf <- c()
mse_lf1 <- c()
for(i in 1:10){
testIndexes <- which(folds==i,arr.ind=TRUE)
xtest<- df[testIndexes, -33]
#xtest<- xtest[,-2]
ytest <- df[testIndexes,33]
xtrain <- df[-testIndexes, -33]
#xtrain <- xtrain[,-2]
ytrain <- df[-testIndexes,33]
rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE)
mse_lf[i] <- mean((predict(rlf,newdata = xtest)-ytest)^2)
print(mse_lf[i])
#
rm(rlf)
rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
mse_rf[i]<- mean((predict(rf,newdata = xtest)-ytest)^2)
print(mse_rf[i])
rm(rf)
rm(xtest)
rm(ytest)
rm(xtrain)
rm(ytrain)
gc()
}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_lf))
cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_lf)/sqrt(10) )
rlf_Studentperformance <- data.frame(lf=mse_lf,rf=mse_rf)
write.csv(rlf_Studentperformance,'rlf_Studentperformance.csv')
#clean up memory
rm(list=ls())
gc()
cars<- read.arff('cars.arff')
set.seed(42)
df<-cars[sample(nrow(cars)),]
#df$area <- log(df$area+1)
#folds <- cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
folds <- createFolds(df$Price, k = 10,list = FALSE)#cut(seq(1,nrow(df)),breaks=10,labels=FALSE)
mse_rf <- c()
mse_lf <- c()
mse_lf1 <- c()
for(i in 1:10){
testIndexes <- which(folds==i,arr.ind=TRUE)
xtest<- df[testIndexes, -1]
#xtest<- xtest[,-2]
ytest <- df[testIndexes,1]
xtrain <- df[-testIndexes, -1]
#xtrain <- xtrain[,-2]
ytrain <- df[-testIndexes,1]
# preproc <- preProcess(xtrain, method = c("center", "scale"))
# scaled.train <- predict(preproc, newdata = xtrain)
# scaled.test <- predict(preproc, newdata = xtest)
rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE)
mse_lf[i] <- mean((predict(rlf,newdata = xtest)-ytest)^2)
print(mse_lf[i])
#
rm(rlf)
rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
mse_rf[i]<- mean((predict(rf,newdata = xtest)-ytest)^2)
print(mse_rf[i])
rm(rf)
rm(xtest)
rm(ytest)
rm(xtrain)
rm(ytrain)
gc()
}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_lf))
cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_lf)/sqrt(10) )
rlf_cars <- data.frame(lf=mse_lf,rf=mse_rf)
write.csv(rlf_cars,'rlf_cars.csv')
#clean up memory
rm(list=ls())
gc()
abalone <- read.arff("abalone.arff")
set.seed(42)
df<-abalone[sample(nrow(abalone)),]
#df$area <- log(df$area+1)
folds <- createFolds(df$rings, k = 10,list = FALSE)
mse_rf <- c()
mse_lf <- c()
mse_rlf <- c()
for(i in 1:10){
testIndexes <- which(folds==i,arr.ind=TRUE)
xtest<- df[testIndexes, -9]
#xtest<- xtest[,-2]
ytest <- df[testIndexes,9]
xtrain <- df[-testIndexes, -9]
#xtrain <- xtrain[,-2]
ytrain <- df[-testIndexes,9]
rlf <- RLForest(ytrain~.,data=xtrain,ntree=100,ntreesub=10,replace=FALSE)
mse_lf[i] <- mean((predict(rlf,newdata =xtest)-ytest)^2)
print(mse_lf[i])
#
#
rm(rlf)
rf <- randomForest(ytrain~.,data=xtrain,ntree=100)
mse_rf[i]<- mean((predict(rf,newdata = xtest)-ytest)^2)
print(mse_rf[i])
rm(rf)
rm(xtest)
rm(ytest)
rm(xtrain)
rm(ytrain)
gc()
}
cat('\nMSE of RF:',mean(mse_rf))
cat('\nMSE of LF:',mean(mse_lf))
cat('\n margin of error of RF:',qt(0.975,9)* sd(mse_rf)/sqrt(10)  )
cat('\n margin of error LF:',qt(0.975,9)* sd(mse_lf)/sqrt(10) )
rlf_abalone <- data.frame(lf=mse_lf,rf=mse_rf)
write.csv(rlf_abalone,'rlf_abalone.csv')
