##### ## Set up data ##### x=matrix(rchisq(2000*10,10),2000) apply(x^2,1,sum)->xs y=xs-9.34 dat=data.frame(y=y,x) ##### ## Perform boosting ##### library(rpart) LValue = 500 learningRate = 0.75 n1=nrow(x) n2=nrow(x)/2 y.tr<-y[c(1:n2)] y.te<-y[c((n2+1):n1)] x.tr<-x[c(1:n2),] x.te<-x[c((n2+1):n1),] fx.i=h=h.pred=h.mean=matrix(0, n2) fx.i.te=matrix(0, n2) tr.er=te.er=matrix(0, LValue) m=nrow(x.tr) # do it for the train first for (i in 1: LValue){ w.i = abs(y.tr - fx.i) # calculate w.i w.i = w.i/sum(w.i) # normalize w_i h.x=rpart(data=data.frame(y=y.tr,x.tr),control="rpart.control",minsplit=10,maxdepth=10,cp=-1, weights=w.i) # fit a tree to the data # fit the tree and calculate the model h.pred=predict(h.x,data.frame(y=y.tr,x.tr)) # calculate beta.hat beta.hat = (solve(t(h.pred)%*%h.pred)%*%t(h.pred))%*%(y.tr - fx.i) # update the fx value for train fx.i = fx.i + learningRate*beta.hat*h.pred # update fx #calculate the train error tr.er[i]=sqrt(sum((fx.i-y.tr)^2)) # now do the same stuff for testing error # fit the tree and calculate the model h.pred=predict(h.x,data.frame(x.te)) # calculate beta.hat for test beta.hat.te = (solve(t(h.pred)%*%h.pred)%*%t(h.pred))%*%(y.te - fx.i.te) # update the fx value for train fx.i.te = fx.i.te + learningRate*beta.hat.te*h.pred # update fx #calculate the test error te.er[i]=sqrt(sum((fx.i.te-y.te)^2)) } plot(te.er,type="o",col='RED',xlab="Iteration",ylab="Sum-Squared-Error",ylim=cbind(0,40000),xlim=cbind(0,LValue)) lines(tr.er,type="o",col='BLUE') g_range <- range(0, tr.er, te.er) title(main="test and train errors", col.main="red", font.main=16) legend(1, g_range[2], c("train error","test error"), cex=0.8,col=c("blue","red"), pch=21:22, lty=1:2);