# set working directory setwd("/home/ekrem/Project2/Data") # include the package library(randomForest) # define a variable for error rates errorRates = matrix(0,1,500) # load the dataset mydata <- read.table("glass.csv",header=TRUE, sep=",") # start fitting random forests mydata.rf <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=500, type="classification") myErrRate = mydata.rf[["err.rate"]] # below line is for the importance of variables round(importance(mydata.rf), 2) plot(myErrRate[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.7),xlim=cbind(0,550)) #lines(myErrRate[,2],type="o",col='BLUE') #lines(myErrRate[,3],type="o",col='BLUE') #lines(myErrRate[,4],type="o",col='BLUE') #lines(myErrRate[,5],type="o",col='BLUE') #lines(myErrRate[,6],type="o",col='BLUE') #lines(myErrRate[,7],type="o",col='BLUE') #g_range <- range(0, myErrRate[,1], myErrRate[,2]) title(main="Random Forest Error Rates", col.main="red", font.main=16) legend(1, g_range[2], c("Err. Rate"), cex=0.8,col=c("red"), pch=21:22, lty=1:2); # start fitting random forests mydata.rf1 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=1) myErrRate1 = mydata.rf1[["err.rate"]] mydata.rf2 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=2) myErrRate2 = mydata.rf2[["err.rate"]] mydata.rf3 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=3) myErrRate3 = mydata.rf3[["err.rate"]] mydata.rf4 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=4) myErrRate4 = mydata.rf4[["err.rate"]] mydata.rf5 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=5) myErrRate5 = mydata.rf5[["err.rate"]] mydata.rf6 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=6) myErrRate6 = mydata.rf6[["err.rate"]] mydata.rf7 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=7) myErrRate7 = mydata.rf7[["err.rate"]] mydata.rf8 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=8) myErrRate8 = mydata.rf8[["err.rate"]] mydata.rf9 <- randomForest(x=mydata[,2:10], y=mydata[,11],importance=TRUE, proximity=TRUE, ntree=200, type="classification",mtry=9) myErrRate9 = mydata.rf9[["err.rate"]] plot(myErrRate1[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 1", col.main="red", font.main=16) plot(myErrRate2[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 2", col.main="red", font.main=16) plot(myErrRate3[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 3", col.main="red", font.main=16) plot(myErrRate4[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 4", col.main="red", font.main=16) plot(myErrRate5[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 5", col.main="red", font.main=16) plot(myErrRate6[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 6", col.main="red", font.main=16) plot(myErrRate7[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 7", col.main="red", font.main=16) plot(myErrRate8[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 8", col.main="red", font.main=16) plot(myErrRate9[,1],type="o",col='RED',xlab="Iteration",ylab="Error Rate",ylim=cbind(0,0.5),xlim=cbind(0,250)) title(main="Error Rates for mtry = 9", col.main="red", font.main=16)