##
##  Example R code for the analysis of the Egyptian skull data
##
##


skulls = read.table("skulls_data.txt",header=T)
names(skulls)
attach(skulls)

Skulls = as.data.frame(skulls)
names(Skulls)=c("MB","BH","BL","NH","Year")
Skulls$Year = as.factor(Skulls$Year)

pairs(skulls[Year==-4000,1:4])
pairs(skulls[Year==150,1:4])


data = skulls

y1 <- data[ ,1]
y2 <- data[ ,2]
y3 <- data[ ,3]
y4 <- data[ ,4]
year <- data[ ,5]
tempus <- c(-4000,-3300,-1850,-200,150)

data1 <- data[  1:30,1:4]
data2 <- data[ 31:60,1:4]
data3 <- data[ 61:90,1:4]
data4 <- data[ 91:120,1:4]
data5 <- data[121:150,1:4]

xi1hat = apply(data1,2,mean)
xi2hat = apply(data2,2,mean)
xi3hat = apply(data3,2,mean)
xi4hat = apply(data4,2,mean)
xi5hat = apply(data5,2,mean)

# all mean estimates, 5 x 4 matrix, 5 eras, 4 skull axes:
xihats <- rbind(xi1hat, xi2hat, xi3hat, xi4hat, xi5hat)

S1 <- var(data1)
S2 <- var(data2)
S3 <- var(data3)
S4 <- var(data4)
S5 <- var(data5)
SS <- (S1 + S2 + S3 + S4 + S5)/5

sigmahats <- sqrt(diag(SS))

par(mfrow=c(2,2))

## plotting the 1st skull axis:
# five confidence intervals:
low1 <- xihats[ ,1] - 1.645*sigmahats[1]/sqrt(30)
up1 <- xihats[ ,1] + 1.645*sigmahats[1]/sqrt(30)

give1 <- seq(low1[1], up1[1], length=100)
give2 <- seq(low1[2], up1[2], length=100)
give3 <- seq(low1[3], up1[3], length=100)
give4 <- seq(low1[4], up1[4], length=100)
give5 <- seq(low1[5], up1[5], length=100)

plot(tempus, c(xi1hat[1], xi2hat[1], xi3hat[1], xi4hat[1], xi5hat[1]),
  ylim=c(129,139), xlab="year", ylab="Maximum breath of skull")
lines(tempus[1] + 0*give1, give1)
lines(tempus[2] + 0*give2, give2)
lines(tempus[3] + 0*give3, give3)
lines(tempus[4] + 0*give4, give4)
lines(tempus[5] + 0*give5, give5)

## plotting the 2nd skull axis:
# five confidence intervals:
low2 <- xihats[ ,2] - 1.645*sigmahats[2]/sqrt(30)
up2 <- xihats[ ,2] + 1.645*sigmahats[2]/sqrt(30)

give1 <- seq(low2[1], up2[1], length=100)
give2 <- seq(low2[2], up2[2], length=100)
give3 <- seq(low2[3], up2[3], length=100)
give4 <- seq(low2[4], up2[4], length=100)
give5 <- seq(low2[5], up2[5], length=100)

plot(tempus, c(xi1hat[2], xi2hat[2], xi3hat[2], xi4hat[2], xi5hat[2]),
  ylim=c(128,136), xlab="year", ylab="Basibregmatic height of skull")
lines(tempus[1] + 0*give1, give1)
lines(tempus[2] + 0*give2, give2)
lines(tempus[3] + 0*give3, give3)
lines(tempus[4] + 0*give4, give4)
lines(tempus[5] + 0*give5, give5)

## plotting the 3rd skull axis:
# five confidence intervals:
low3 <- xihats[ ,3] - 1.645*sigmahats[3]/sqrt(30)
up3 <- xihats[ ,3] + 1.645*sigmahats[3]/sqrt(30)

give1 <- seq(low3[1], up3[1], length=100)
give2 <- seq(low3[2], up3[2], length=100)
give3 <- seq(low3[3], up3[3], length=100)
give4 <- seq(low3[4], up3[4], length=100)
give5 <- seq(low3[5], up3[5], length=100)

plot(tempus, c(xi1hat[3], xi2hat[3], xi3hat[3], xi4hat[3], xi5hat[3]),
  ylim=c(92,101), xlab="year", ylab="Basialveolar length of skull")
lines(tempus[1] + 0*give1, give1)
lines(tempus[2] + 0*give2, give2)
lines(tempus[3] + 0*give3, give3)
lines(tempus[4] + 0*give4, give4)
lines(tempus[5] + 0*give5, give5)

## plotting the 4th skull axis:
# five confidence intervals:
low4 <- xihats[ ,4] - 1.645*sigmahats[4]/sqrt(30)
up4 <- xihats[ ,4] + 1.645*sigmahats[4]/sqrt(30)

give1 <- seq(low4[1], up4[1], length=100)
give2 <- seq(low4[2], up4[2], length=100)
give3 <- seq(low4[3], up4[3], length=100)
give4 <- seq(low4[4], up4[4], length=100)
give5 <- seq(low4[5], up4[5], length=100)

plot(tempus, c(xi1hat[4], xi2hat[4], xi3hat[4], xi4hat[4], xi5hat[4]),
  ylim=c(49,53), xlab="year", ylab="Nasal height of skull")
lines(tempus[1] + 0*give1, give1)
lines(tempus[2] + 0*give2, give2)
lines(tempus[3] + 0*give3, give3)
lines(tempus[4] + 0*give4, give4)
lines(tempus[5] + 0*give5, give5)


# MODEL FITTING


n1=n2=n3=n4=n5=30
nn <- n1 + n2 + n3 + n4 + n5


S1 <- var(data1)*(n1-1)/n1
S2 <- var(data2)*(n2-1)/n2
S3 <- var(data3)*(n3-1)/n3
S4 <- var(data4)*(n4-1)/n4
S5 <- var(data5)*(n5-1)/n5
SS <- (S1 + S2 + S3 + S4 + S5)/5

sigmahats <- sqrt(diag(SS))


det <- function(A)
{prod(eigen(A)$values)}

### model 0:
# five mean vectors and five covariance matrices.
# likelihood = product of the 5 separate likelihoods

aic0 = (-n1*log(det(S1)) - nn*4 - 2*(20 + 5*10) - nn*4*log(2*pi)
-n2*log(det(S2))-n3*log(det(S3))-n4*log(det(S4))-n5*log(det(S5)))

# aic0 = -3512.960

bic0 = (-n1*log(det(S1)) - nn*4 - log(nn)*(20 + 5*10) - nn*4*log(2*pi)
-n2*log(det(S2))-n3*log(det(S3))-n4*log(det(S4))-n5*log(det(S5)))

#  bic0 = -3723.704



### model 1:
# classic, five mean vectors with 20 pms, same Sigma with 10 pms:
# then Sigmahat = SS, and:

aic1 <- -nn*log(det(SS)) - nn*4 - 2*(20 + 10) - nn*4*log(2*pi)

# aic1 = -3483.18

bic1 <- -nn*log(det(SS)) - nn*4 - log(nn)*(20 + 10) - nn*4*log(2*pi)

# bic1 = -3573.499
 

### model 2:
# also classic, same mean with 4 pms, same Sigma with 10 pms:
# then Sigmahat = SS + sum( n_i/n * (bar y_i - bar y)(bar y_i - bar y)' ), and:

xibar <- c(mean(xihats[ ,1]), mean(xihats[ ,2]),
   mean(xihats[ ,3]), mean(xihats[ ,4]))
d1 <- xihats[1, ] - xibar
d2 <- xihats[2, ] - xibar
d3 <- xihats[3, ] - xibar
d4 <- xihats[4, ] - xibar
d5 <- xihats[5, ] - xibar

extra <- d1%*%t(d1) + d2%*%t(d2) + d3%*%t(d3) + d4%*%t(d4) + d5%*%t(d5) 
Sigmahat <- SS + (1/5)*extra

aic2 <- -nn*log(det(Sigmahat)) - nn*4 - 2*(4 + 10) - nn*4*log(2*pi)

# aic2 = -3512.695

bic2 <- -nn*log(det(Sigmahat)) - nn*4 - log(nn)*(4 + 10) - nn*4*log(2*pi)
# bic2 = -3554.844
 
### model 3:
# non-standard, linear trends with 2*4 pms, same Sigma with 10 pms:

timef <- tempus/1000

criterion <- function(pms)
{
a1 <- pms[1]
b1 <- pms[2]
a2 <- pms[3]
b2 <- pms[4]
a3 <- pms[5]
b3 <- pms[6]
a4 <- pms[7]
b4 <- pms[8]
# era 1:
hei1 <- a1 + b1*(timef[1]-timef[1])
hei2 <- a2 + b2*(timef[1]-timef[1])
hei3 <- a3 + b3*(timef[1]-timef[1])
hei4 <- a4 + b4*(timef[1]-timef[1])
d1 <- xi1hat - c(hei1,hei2,hei3,hei4)
# era 2:
hei1 <- a1 + b1*(timef[2]-timef[1])
hei2 <- a2 + b2*(timef[2]-timef[1])
hei3 <- a3 + b3*(timef[2]-timef[1])
hei4 <- a4 + b4*(timef[2]-timef[1])
d2 <- xi2hat - c(hei1,hei2,hei3,hei4)
# era 3:
hei1 <- a1 + b1*(timef[3]-timef[1])
hei2 <- a2 + b2*(timef[3]-timef[1])
hei3 <- a3 + b3*(timef[3]-timef[1])
hei4 <- a4 + b4*(timef[3]-timef[1])
d3 <- xi3hat - c(hei1,hei2,hei3,hei4)
# era 4:
hei1 <- a1 + b1*(timef[4]-timef[1])
hei2 <- a2 + b2*(timef[4]-timef[1])
hei3 <- a3 + b3*(timef[4]-timef[1])
hei4 <- a4 + b4*(timef[4]-timef[1])
d4 <- xi4hat - c(hei1,hei2,hei3,hei4)
# era 5:
hei1 <- a1 + b1*(timef[5]-timef[1])
hei2 <- a2 + b2*(timef[5]-timef[1])
hei3 <- a3 + b3*(timef[5]-timef[1])
hei4 <- a4 + b4*(tempusfugit[5]-tempusfugit[1])
d5 <- xi5hat - c(hei1,hei2,hei3,hei4)
#
extra <- d1%*%t(d1) + d2%*%t(d2) + d3%*%t(d3) + d4%*%t(d4) + d5%*%t(d5) 
Sigmahat <- SS + (1/5)*extra
log( det(Sigmahat) )
}

xibar <- c(mean(xihats[ ,1]), mean(xihats[ ,2]),
   mean(xihats[ ,3]), mean(xihats[ ,4]))

starthere <- c(xibar[1],0, xibar[2],0, xibar[3],0, xibar[4],0)
# get to work, nlm, in 26 iterations, 8 parameters:
nlm(criterion, starthere)

# computing the estimate:
well <- nlm(criterion, starthere)$estimate


aic3 <- -nn*criterion(well) - nn*4 - 2*(8 + 10) - nn*4*log(2*pi)
#  aic3 = -3468.115  Linear trend, Sigma unstructured (same sigma)

bic3 <- -nn*criterion(well) - nn*4 - log(nn)*(8 + 10) - nn*4*log(2*pi)
# bic3 = -3522.306

x1 <- MB
x2 <- BH
x3 <- BL
x4 <- NH

Y = cbind(x1,x2,x3,x4)

multinormal.loglikelihood = function(Y,Z,Sigma,beta)
{
  # Y = Z beta + eps, 
  # where dim(Y)=n x m,dim(Z)=n x p, dim(beta)=p x m, dim(eps)=n x m

  n = nrow(Y)
  m = ncol(Y)
  beta.hat = matrix(solve(t(Z)%*%Z) %*% t(Z) %*% Y, ncol=m)
  eps.hat = Y - Z %*% beta.hat
  Sigmainv = solve(Sigma)

  log.likelihood = ( - 0.5*m*n*log(2*pi) - 0.5*n*log(det(Sigma))
            -0.5*sum(diag( Sigmainv %*% t(eps.hat) %*% eps.hat ))
   -0.5*sum(diag( Z %*% (beta.hat-beta)%*% Sigmainv %*% t(beta.hat-beta)%*%t(Z) )) )	
  return(log.likelihood)	
}


Model4corr = function(Y,Year,startvalues)
{    # Sigma equicorrelation, different variances

     xvec = startvalues # s1^2,...s4^2,corr, betas

     d = ncol(Y)
     nn = nrow(Y)

     betastart = matrix(xvec[6:(6+2*d-1)],nrow=2, byrow=T)
     Designmat = cbind(rep(1,nn),Year)
     rho = xvec[5]
     sig = sqrt(xvec[1:4])

	Sigmat <- matrix(rho,nrow=d,ncol=d)
	diag(Sigmat) <- xvec[1:4]
	Sigmat[1,2] <- Sigmat[2,1] <- rho*sig[1]*sig[2]
	Sigmat[1,3] <- Sigmat[3,1] <- rho*sig[1]*sig[3]
	Sigmat[1,4] <- Sigmat[4,1] <- rho*sig[1]*sig[4]
	Sigmat[2,3] <- Sigmat[3,2] <- rho*sig[2]*sig[3]
	Sigmat[2,4] <- Sigmat[4,2] <- rho*sig[2]*sig[4]
	Sigmat[3,4] <- Sigmat[4,3] <- rho*sig[3]*sig[4]

     logL.M4corr = multinormal.loglikelihood(Y,Designmat,Sigmat,betastart)
     return(-logL.M4corr)
}

start.M4corr = c(20.48, 23.43, 23.52, 9.92, 0.2, apply(Y,2,mean),rep(0,4))
max.M4corr = nlm(f=Model4corr,p=start.M4corr,Y=Y,Year=Year/1000,hessian=T,iterlim=1000)
aic4corr = -2*max.M4corr$minimum-2*(8+5)
aic4corr

# aic4corr =  -3464.649


bic4corr = -2*max.M4corr$minimum-log(nn)*(8+5)
bic4corr

# bic4corr = -3503.787



Model4corrsamevar = function(Y,Year,startvalues)
{    # Sigma equicorrelation, same variances

     xvec = startvalues # s^2,corr, betas

     d = ncol(Y)
     nn = nrow(Y)

     betastart = matrix(xvec[3:(3+2*d-1)],nrow=2, byrow=T)
     Designmat = cbind(rep(1,nn),Year)
     rho = xvec[2]
     sig = xvec[1]

	Sigmat <- matrix(rho*sig,nrow=d,ncol=d)
	diag(Sigmat) <- rep(sig,4)

     logL.M4corrv = multinormal.loglikelihood(Y,Designmat,Sigmat,betastart)
     return(-logL.M4corrv)
}

start.M4corrv = c(mean(c(20.48, 23.43, 23.52, 9.92)), 0.2, apply(Y,2,mean),rep(0,4))
max.M4corrv = nlm(f=Model4corrsamevar,p=start.M4corrv,Y=Y,Year=Year/1000,hessian=T,iterlim=1000)
aic4corrv = -2*max.M4corrv$minimum-2*(8+2)
aic4corrv


bic4corrv = -2*max.M4corrv$minimum-log(nn)*(8+2)
bic4corrv

max.M4corrv

Model5 = function(Y,Year,startvalues)
{    # Sigma equicorrelation, same variance

     xvec = startvalues # s^2,covv, betas

	d = ncol(Y)
     nn = nrow(Y)

     betastart = matrix(xvec[3:(3+2*d-1)],nrow=2, byrow=T)
	Designmat = cbind(rep(1,nn),Year)

	Sigmat <- matrix(xvec[2],nrow=d,ncol=d)
     diag(Sigmat) = rep(xvec[1],4)

	logL.M5 = multinormal.loglikelihood(Y,Designmat,Sigmat,betastart)
     return(-logL.M5)
}

start.M5 = c(mean(c(20.48, 23.43, 23.52, 9.92)), 1, apply(Y,2,mean),rep(0,4))
max.M5 = nlm(f=Model5,p=start.M5,Y=Y,Year=Year/1000,hessian=T,iterlim=1000)

aic5 = -2*max.M5$minimum-2*(8+2)
# aic5 = -3493.05

bic5 = -2*max.M5$minimum-log(nn)*(8+2)