#####################################################################
#
# R code for reproducing the experiments in 
# (Roos, Oulasvirta, "An Extended Framework for Measuring the
# Information Capacity of the Human Motor System)
#
# code by Teemu Roos, Feb 15, 2011
# teemu.roos (at) cs.helsinki.fi
#
# Data should be stored in directory 'aligneddata'
# Currently expects files 05_%d_ali_%d.txt to be the 20 
# sequences in category 'Subject #5: modern dance" from
# mocap.cs.cmu.edu /after alignment/. To align, we recommend
# Canonical Time Warping, from www.humansensing.cs.cmu.edu/
# projects/ctwCode.html .
#
#####################################################################


twopie = 2*pi*exp(1);

# return stochastic complexity of sequence r with side information v
evalSC <- function(v, r)
{
   n <- nrow(r);           # number of features
   totCL <- 0;             # total code-length over all features
   for (i in 1:ncol(r)) 
   {
	# extract regressors depending on the input size
        if (ncol(v) == ncol(r))
	{
	        Aplus <- qr(cbind(matrix(1,n,1), v[, i]));
		k = 3;
	}
        if (ncol(v) == 2 * ncol(r))
	{
		Aplus <- qr(cbind(matrix(1,n,1), v[, i], v[, ncol(r)+i]));
		k = 4;
	}
	if (ncol(v) == 3 * ncol(r))
	{
		# this one used in the paper
	     	Aplus <- qr(cbind(matrix(1,n,1), v[, i], v[, ncol(r)+i], 
		      	 		         v[, 2*ncol(r)+i]));
		k = 5;
	}
	if (ncol(v) == 4 * ncol(r))
	{
	     	Aplus <- qr(cbind(matrix(1,n,1), v[, i], v[, ncol(r)+i], 
		      	 		         v[, 2*ncol(r)+i],
		      	 		         v[, 3*ncol(r)+i]));
		k = 6;
	}

   	b <- qr.coef(Aplus, r[,i]);      # least squares fit
   	res <- qr.resid(Aplus, r[, i]);  # residuals

if (sum(res^2) < .01) print(c(i,sum(res^2)))  # warning about too low variance

	rss <- sum(res^2)

	# Rissanen's classic two-part MDL code-length (feel free
	# to replace by more advanced universal code).

   	totCL <- totCL + n/2*log(twopie*rss/n) + k/2*log(n);
   }
   return(totCL);
}

# complexity of a single (multivariate) sequence
SC <- function(a)
{
   n <- nrow(a)-2;
   if (n < 1) return(list(n=0,k=1,cl=0,rate=0));
   cl <- evalSC(cbind(a[1:n,],a[1:n+1,]), a[1:n+2,]);
   return(list(n=n,k=k,cl=cl,rate=cl/n));
}

# complexity of a (multivariate) sequence given another
SCcond <- function(a,b)
{
   n <- min(nrow(a), nrow(b))-2;
   cl <- evalSC(cbind(a[1:n,],a[1:n+1,],b[1:n+2,]), a[1:n+2,]);
   return(list(n=n,n,k=k,cl=cl,rate=cl/n));
}
   

its <- 20; # number of sequences

M <- matrix(0,its,its);
Mcl <- matrix(0,its,its);

Ka <- array(list(NULL),c(its,its));
Kab <- array(list(NULL),c(its,its));
for (i in 1:its)
   for (j in 1:its) 
   {
      if (i != j) {
      	    a <- read.table(sprintf("aligneddata/05_%d_ali_%d.txt", i, j));
	    b <- read.table(sprintf("aligneddata/05_%d_ali_%d.txt", j, i));

	    # eliminate features with too small variance
	    a$V1=NULL; a$V3=NULL; a$V25=NULL; a$V26=NULL;
	    b$V1=NULL; b$V3=NULL; b$V25=NULL; b$V26=NULL;
	    a$V34=NULL; a$V37=NULL; a$V38=NULL; a$V46=NULL;
	    b$V34=NULL; b$V37=NULL; b$V38=NULL; b$V46=NULL;

	    # remove rows duplicated in sequence 'a' due to alignment
	    skip <- matrix(FALSE, nrow(a));
	    skip <- rowSums((a[2:nrow(a),]-a[1:(nrow(a)-1),])^2) < 0.001;
	    a <- a[!skip,];
	    b <- b[!skip,];

	    # normalize features
	    a <- t(apply(a,1,'-',apply(a,2,mean)));
	    a <- t(apply(a,1,'/',sqrt(apply(a,2,var))));
	    a[is.nan(a)]<-0;
	    b <- t(apply(b,1,'-',apply(b,2,mean)));
	    b <- t(apply(b,1,'/',sqrt(apply(b,2,var))));
	    b[is.nan(b)]<-0;

	    lena <- nrow(a);
	    lenb <- nrow(b);

	    # compute stochastic complexity with and without condition
      	    Ka[[i,j]] <- SC(a);
	    Kab[[i,j]] <- SCcond(a,b);
	    
	    # rate times difference gives information capacity
	    # /log(2.0) to get bits instead of nats
     	    M[i,j] <- (Ka[[i,j]]$cl - Kab[[i,j]]$cl)/lena*120/log(2.0);
      	    Mcl[i,j] <- (Ka[[i,j]]$cl - Kab[[i,j]]$cl)/log(2.0);
      }
   }

options(digits=2)
print(M);

# that all folks!