# SNLS model selection compared to AIC, BIC, PLS, and NML # To go with the paper "Model Selection by Sequentially Normalized # Least Squares" by Rissanen, Roos, Myllymäki # # R implementation by T. Roos # November 17, 2009 # burn-in length for generating random sequences burnin = 200 # different sample sizes samplesizes <- c(50,100,200,400,800,1600) maxk <- 15 # estimate order within range 1,...,maxk reps=100 # number of repetitions (should be increased for realiable results) # initialize accuracy matrix snls_choice = matrix(0,maxk,length(samplesizes)) pls_choice = matrix(0,maxk,length(samplesizes)) bic_choice = matrix(0,maxk,length(samplesizes)) aic_choice = matrix(0,maxk,length(samplesizes)) nml_choice = matrix(0,maxk,length(samplesizes)) snls_err = array(0,dim=c(maxk,length(samplesizes),reps)) pls_err = array(0,dim=c(maxk,length(samplesizes),reps)) bic_err = array(0,dim=c(maxk,length(samplesizes),reps)) aic_err = array(0,dim=c(maxk,length(samplesizes),reps)) nml_err = array(0,dim=c(maxk,length(samplesizes),reps)) for (ni in 1:length(samplesizes)) { n <- samplesizes[ni] for (ktrue in c(1,2,3,4,5,6,7,8,9,10)) { # repeat many times for (iter in 1:reps) { # define model (uniformly random but stable) unstable <- TRUE while (unstable) { coeffs <- runif(ktrue,-1,1) unstable <- FALSE tryCatch({ arima.sim(n = 1, list(ar = coeffs)) }, error = function(ex) { unstable <<- TRUE }) } beta <- matrix(coeffs, 1, length(coeffs)) m <- maxk+1 # generate data x <- matrix(0,n+burnin+1,1) for (i in (ktrue+1):(n+burnin+1)) { x[i] <- x[(i-1):(i-ktrue)] %*% beta[1:ktrue] + rnorm(1); } # construct design matrix A=matrix(0,n,maxk) for (i in 1:n) { A[i,1:maxk] <- x[(burnin+i-1):(burnin+i-maxk)] } x <- x[(burnin+1):(burnin+n+1)] snls_k <- -1 nml_k <- 1 snlsm_res <- matrix(0,n,maxk) snlsm_c <- matrix(0,n,maxk) # prediction error for each model order prediction_error = matrix(0,maxk) for (k in 1:maxk) { # evaluate criteria nml_res <- matrix(0,n,1) pls_res <- matrix(0,n,1) # SNLS - recursive form V <- matrix(0,k,k) b <- matrix(0,k,1) for (t in 1:n) { if (t < k+1) { Aplus <- qr(matrix(A[1:t,1:k],t,k)) b <- qr.coef(Aplus, x[1:t]) res <- qr.resid(Aplus,x[1:t]) e <- res[t] c <- 0 } else { if (t == k+1) V <- solve(t(A[1:(t-1),1:k]) %*% A[1:(t-1),1:k]) c <- A[t,1:k] %*% V %*% A[t,1:k] V <- V - V %*% A[t,1:k] %*% A[t,1:k] %*% V / (1+c[1]) b <- b + V %*% A[t,1:k] * (x[t] - A[t,1:k] %*% b)[1] e <- x[t] - A[t,1:k] %*% b } if (t >= m && t < n) pls_res[t+1] <- x[t+1] - A[(t+1),1:k] %*% b if (t > 1) { snlsm_res[t, k] <- e snlsm_c[t, k] <- c } } snls_crit <- (n-m)/2 * log(sum(snlsm_res[(m+1):n,k]^2)) + sum(log(snlsm_c[(m+1):n,k]+1)) if (k == 1 || is.nan(snls_mincrit) || (!is.nan(snls_crit) && snls_crit < snls_mincrit)) { snls_k <- k snls_mincrit <- snls_crit } # NML (same term used also in AIC, BIC) Aplus <- qr(A[1:n,1:k]) b <- qr.coef(Aplus, x[1:n]) nml_res <- qr.resid(Aplus,x[1:n]) Rhat <- b %*% t(A[1:n,1:k]) %*% A[1:n,1:k] %*% b / n nml_crit <- (n-k)/2 * log(sum(nml_res^2)/n/(n-k)) + k/2 * log(Rhat/k) + .5 * log(k*(n-k)) if (k == 1 || nml_crit < nml_mincrit) { nml_k <- k nml_mincrit <- nml_crit } pls_crit <- sum(pls_res^2) if (k == 1 || pls_crit < pls_mincrit) { pls_k <- k pls_mincrit <- pls_crit } bic_crit <- n/2 * log(sum(nml_res^2)/n) + (k+1)/2 * log(n) if (k == 1 || bic_crit < bic_mincrit) { bic_k <- k bic_mincrit <- bic_crit } aic_crit <- n/2 * log(sum(nml_res^2)/n) + (k+1) if (k == 1 || aic_crit < aic_mincrit) { aic_k <- k aic_mincrit <- aic_crit } test_i <- n+1 prediction_error[k] <- (x[test_i] - x[(test_i-1):(test_i-k)] %*% b)^2 } if (snls_k == ktrue) snls_choice[snls_k,ni] <- snls_choice[snls_k,ni] + 1 snls_err[ktrue,ni,iter] <- prediction_error[snls_k] if (nml_k == ktrue) nml_choice[nml_k,ni] <- nml_choice[nml_k,ni] + 1 nml_err[ktrue,ni,iter] <- prediction_error[nml_k] if (pls_k == ktrue) pls_choice[pls_k,ni] <- pls_choice[pls_k,ni] + 1 pls_err[ktrue,ni,iter] <- prediction_error[pls_k] if (bic_k == ktrue) bic_choice[bic_k,ni] <- bic_choice[bic_k,ni] + 1 bic_err[ktrue,ni,iter] <- prediction_error[bic_k] if (aic_k == ktrue) aic_choice[aic_k,ni] <- aic_choice[aic_k,ni] + 1 aic_err[ktrue,ni,iter] <- prediction_error[aic_k] } print(n,ktrue) } } # do graphics; two files for k*=1,...,5 and k*=6,...,10, respectively. postscript("snls3a.eps",horizontal=FALSE,onefile=FALSE,height=12,width=7.4,pointsize=10) par(mfrow=c(5,2)) par(cex=.86) par(mai=c(.4,.52,.25,.05)) par(lwd=1.3) for (k in 1:5) { par(new=FALSE); klab=paste("k* =",k) # barplot for model order selection (percentage of correct choices) barplot(t(matrix(c(aic_choice[k,1:6]/reps,bic_choice[k,1:6]/reps,pls_choice[k,1:6]/reps,nml_choice[k,1:6]/reps,snls_choice[k,1:6]/reps),length(aic_choice[k,1:6]),5)), t='l', col=c('red','blue','green','gray89','darkgray'), ylim=c(0,1),ylab="",xlab="",yaxt="n",beside=TRUE,names.arg=samplesizes[1:6]) axis(side=2,las=1,at=0:4*.25,labels=c("0%","25%","50%","75%","100%")) # boxplot for predictive accuracy (with mean added as a triangle (pch=24)) par(new=FALSE); boxplot(aic_err[k,1,],aic_err[k,2,],aic_err[k,3,],aic_err[k,4,],aic_err[k,5,],aic_err[k,6,],names=c("","","","","",""),at=1:6-0.3,xlim=c(0.65,6.35),boxwex=.15,col=c('red','red','red','red','red','red'),ylim=c(0,1.7)) points(y=c(mean(aic_err[k,1,]),mean(aic_err[k,2,]),mean(aic_err[k,3,]),mean(aic_err[k,4,]),mean(aic_err[k,5,]),mean(aic_err[k,6,])),x=1:6-0.3,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(bic_err[k,1,],bic_err[k,2,],bic_err[k,3,],bic_err[k,4,],bic_err[k,5,],bic_err[k,6,],names=c("","","","","",""),at=1:6-0.15,xlim=c(0.65,6.35),boxwex=.15,col=c('blue','blue','blue','blue','blue','blue'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(bic_err[k,1,]),mean(bic_err[k,2,]),mean(bic_err[k,3,]),mean(bic_err[k,4,]),mean(bic_err[k,5,]),mean(bic_err[k,6,])),x=1:6-0.15,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(pls_err[k,1,],pls_err[k,2,],pls_err[k,3,],pls_err[k,4,],pls_err[k,5,],pls_err[k,6,],names=samplesizes,at=1:6,xlim=c(0.65,6.35),boxwex=.15,col=c('green','green','green','green','green','green'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(pls_err[k,1,]),mean(pls_err[k,2,]),mean(pls_err[k,3,]),mean(pls_err[k,4,]),mean(pls_err[k,5,]),mean(pls_err[k,6,])),x=1:6,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(nml_err[k,1,],nml_err[k,2,],nml_err[k,3,],nml_err[k,4,],nml_err[k,5,],nml_err[k,6,],names=c("","","","","",""),at=1:6+.15,xlim=c(0.65,6.35),boxwex=.15,col=c('gray89','gray89','gray89','gray89','gray89','gray89'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(nml_err[k,1,]),mean(nml_err[k,2,]),mean(nml_err[k,3,]),mean(nml_err[k,4,]),mean(nml_err[k,5,]),mean(nml_err[k,6,])),x=1:6+.15,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(snls_err[k,1,],snls_err[k,2,],snls_err[k,3,],snls_err[k,4,],snls_err[k,5,],snls_err[k,6,],names=c("","","","","",""),at=1:6+.3,xlim=c(0.35,6.35),boxwex=.15,col=c('darkgray','darkgray','darkgray','darkgray','darkgray','darkgray'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(snls_err[k,1,]),mean(snls_err[k,2,]),mean(snls_err[k,3,]),mean(snls_err[k,4,]),mean(snls_err[k,5,]),mean(snls_err[k,6,])),x=1:6+.3,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) mtext(klab,at=c(0),padj=-1) } dev.off() postscript("snls3b.eps",horizontal=FALSE,onefile=FALSE,height=12,width=7.4,pointsize=10) par(mfrow=c(5,2)) par(cex=.86) par(mai=c(.4,.52,.25,.05)) par(lwd=1.3) for (k in 6:10) { par(new=FALSE); klab=paste("k* =",k) barplot(t(matrix(c(aic_choice[k,1:6]/reps,bic_choice[k,1:6]/reps,pls_choice[k,1:6]/reps,nml_choice[k,1:6]/reps,snls_choice[k,1:6]/reps),length(aic_choice[k,1:6]),5)), t='l', col=c('red','blue','green','gray89','darkgray'), ylim=c(0,1),ylab="",xlab="",yaxt="n",beside=TRUE,names.arg=samplesizes[1:6]) axis(side=2,las=1,at=0:4*.25,labels=c("0%","25%","50%","75%","100%")) par(new=FALSE); boxplot(aic_err[k,1,],aic_err[k,2,],aic_err[k,3,],aic_err[k,4,],aic_err[k,5,],aic_err[k,6,],names=c("","","","","",""),at=1:6-0.3,xlim=c(0.65,6.35),boxwex=.15,col=c('red','red','red','red','red','red'),ylim=c(0,1.7)) points(y=c(mean(aic_err[k,1,]),mean(aic_err[k,2,]),mean(aic_err[k,3,]),mean(aic_err[k,4,]),mean(aic_err[k,5,]),mean(aic_err[k,6,])),x=1:6-0.3,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(bic_err[k,1,],bic_err[k,2,],bic_err[k,3,],bic_err[k,4,],bic_err[k,5,],bic_err[k,6,],names=c("","","","","",""),at=1:6-0.15,xlim=c(0.65,6.35),boxwex=.15,col=c('blue','blue','blue','blue','blue','blue'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(bic_err[k,1,]),mean(bic_err[k,2,]),mean(bic_err[k,3,]),mean(bic_err[k,4,]),mean(bic_err[k,5,]),mean(bic_err[k,6,])),x=1:6-0.15,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(pls_err[k,1,],pls_err[k,2,],pls_err[k,3,],pls_err[k,4,],pls_err[k,5,],pls_err[k,6,],names=samplesizes,at=1:6,xlim=c(0.65,6.35),boxwex=.15,col=c('green','green','green','green','green','green'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(pls_err[k,1,]),mean(pls_err[k,2,]),mean(pls_err[k,3,]),mean(pls_err[k,4,]),mean(pls_err[k,5,]),mean(pls_err[k,6,])),x=1:6,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(nml_err[k,1,],nml_err[k,2,],nml_err[k,3,],nml_err[k,4,],nml_err[k,5,],nml_err[k,6,],names=c("","","","","",""),at=1:6+.15,xlim=c(0.65,6.35),boxwex=.15,col=c('gray89','gray89','gray89','gray89','gray89','gray89'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(nml_err[k,1,]),mean(nml_err[k,2,]),mean(nml_err[k,3,]),mean(nml_err[k,4,]),mean(nml_err[k,5,]),mean(nml_err[k,6,])),x=1:6+.15,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) boxplot(snls_err[k,1,],snls_err[k,2,],snls_err[k,3,],snls_err[k,4,],snls_err[k,5,],snls_err[k,6,],names=c("","","","","",""),at=1:6+.3,xlim=c(0.35,6.35),boxwex=.15,col=c('darkgray','darkgray','darkgray','darkgray','darkgray','darkgray'),ylim=c(0,1.7),add=TRUE) points(y=c(mean(snls_err[k,1,]),mean(snls_err[k,2,]),mean(snls_err[k,3,]),mean(snls_err[k,4,]),mean(snls_err[k,5,]),mean(snls_err[k,6,])),x=1:6+.3,xlim=c(0.65,6.35),pch=24,bg='black',ylim=c(0,1.7)) mtext(klab,at=c(0),padj=-1) } dev.off()