#####################################################################################
# Code for: Jeff Gill and George Casella. Nonparametric Priors For Ordinal          #
# Bayesian Social Science Models: Specification and Estimation. Journal of the      #
# American Statistical Association, Forthcoming JASA, 2009.                         #
#                     		       Copyright 2008, Jeff Gill and George Casella #
#####################################################################################

# SETUP UP DATA 

library(MASS); library(msm); library(bayesm)

pa.large.df <- read.table("http://jgill.wustl.edu/data/pa.large.dat",header=TRUE)
pa.large.df$stress<- factor(pa.large.df$stress)
       levels(pa.large.df$stress) <- c("None","Little","Some","Significant","Extreme")
       pa.large.df$stress <- factor(as.ordered(pa.large.df$stress))
n <- nrow(pa.large.df)

# FIX PRIOR DISTRIBUTIONS AND PRIOR PARAMETERS, RUN SAMPLER

bugs.beta <- c(-0.1,0.15,0.11,-0.24,-0.38,0.60,-0.23,0.15,-0.04,-0.23,0.17,-0.47)
bugs.theta <- c(-0.99,-0.44,0.27,1.82)
beta <- matrix(bugs.beta,nrow=1) 			
theta <- matrix(c(-Inf,bugs.theta,Inf),nrow=1)
C.num <- ncol(theta) 
C.vec <- sample(1:n,n,replace=TRUE)
Y <- matrix(rep(0,n*(C.num-1)),ncol=(C.num-1)); for (i in 1:nrow(Y))  Y[i,pa.large.df[i,1]] <- 1
X <- as.matrix(pa.large.df[,2:12]); X <- cbind(rep(1,nrow(X)),X)
m <- 1000; sigma <- 1; a <- 1; b <- 1; d <- 10; e <- 0
f.y.psi <- C.prob <- nil <- H <- u <- rep(0,n)
psi <- rnorm(n,0,0.1)
sigma.beta.sq <- 5; mu.beta <- 0 			
A <- ( sigma.beta.sq*t(X)%*%X + diag(sigma^2,nrow=ncol(beta)) )/( sigma^2 * sigma.beta.sq )
num.reps <- 50000
C.mat <- matrix(0,nrow=n,ncol=n)
C.summary <- matrix(NA,ncol=2,nrow=num.reps)
psi.summary <- matrix(NA,ncol=length(bugs.beta),nrow=num.reps)
m.val.vec <- seq(1:200); max.m <- 200; m.prob.vec <- rep(0,length(m.val.vec))
source("polya2-jasa.s")

# SUMMARIZING THE RESULTS

start <- 20001; stop <- 50000; M.star <- stop-start
betafinal   <-  beta[start:stop,]
thetafinal  <- theta[start:stop,-c(1,6)]
betasort    <- apply(betafinal,2,sort)
thetasort   <- apply(thetafinal,2,sort)
ci.95       <- cbind( betasort[M.star*0.025,], betasort[M.start*.0.975,] )[-1,]
ti.95       <- cbind( thetasort[M.star*0.025,], thetasort[M.start*.0.975,] )
mcmc.summary <- rbind( 	cbind( apply(betafinal,2,mean)[-1], ci.95 ),
    			cbind( apply(thetafinal,2,mean),    ti.95 ) ) 
mcmc.summary[13:16,1] <- mcmc.summary[13:16,1] + mcmc.summary[1,1]
dimnames(mcmc.summary) <- list(c(dimnames(pa.large.df)[[2]][-1], "theta1","theta2","theta3","theta4"),NULL)
round(mcmc.summary,3)

x.mean <- apply(pa.large.df[,-1],2,mean)
x.mean <- c(x.mean[1:3],x.mean[6:10],x.mean[4:5],x.mean[11])
coef1 <- summary(pa.plo)$coef[1:11,1]
coef2 <- apply(betafinal,2,mean)[-1]
coef2 <- c(coef2[1:3],coef2[6:10],coef2[4:5],coef2[11])
scale1 <- summary(pa.plo)$coef[12:15,1]
scale2 <- apply(thetafinal,2,mean) + mean(betafinal[,1])
se1 <- summary(pa.plo)$coef[1:11,2]
se2 <- sqrt(apply(betafinal,2,var))[-1]
se2 <- c(se2[1:3],se2[6:10],se2[4:5],se2[11])
ci1 <- matrix(c( coef1 - qnorm(0.99)*se1, coef1 + qnorm(0.99)*se1),byrow=FALSE,ncol=2)
betasort <- apply(betafinal,2,sort)
ci2 <- cbind( betasort[750,], betasort[29250,] )[-1,]
d1 <- 1/diff(scale1)
d2 <- 1/diff(scale2)     
reversion <- c(0,1,1,1,1,1,1,1,0,1,0)   
BX1 <- coef1   * x.mean - coef1 * reversion;   
BX2 <- coef2   * x.mean - coef2 * reversion;   
BL1 <- ci1[,1] * x.mean - coef1 * reversion;  
BH1 <- ci1[,2] * x.mean - coef2 * reversion;   
BL2 <- ci2[,1] * x.mean - coef1 * reversion;   
BH2 <- ci2[,2] * x.mean - coef2 * reversion;   
model.compare <- cbind(
    round( cbind( BL1 %o% mean(d1), BH1 %o% mean(d1) ),3 ),
    round( cbind( BL2 %o% mean(d2), BH1 %o% mean(d2) ),3 )  )
dimnames(model.compare)[[2]] <- list("hpd.lo","hpd.hi","hpd.lo","hpd.hi")
model.compare

# GRAPHS

par(mfrow=c(1,2))
plot.ts(apply(beta, 2,cumsum)/c(1:nrow(beta)),plot.type="single",lty=1:5,ylab="Issue Space",
	xlab="Iterations")
mtext(outer=F,cex=1.3,"Beta Cumulative Sum Plot",line=1.3)
plot.ts(apply(theta, 2,cumsum)/c(1:nrow(theta)),plot.type="single",lty=1:5,ylab="Issue Space",
	xlab="Iterations")
mtext(outer=F,cex=1.3,"Theta Cumulative Sum Plot",line=1.3)

par(mfrow=c(1,1),cex.lab=2.0,cex.main=2.0,mar=c(6,6,5,2))
par(mfrow=c(1,1),cex.axis=1.5)
hist(C.vec,breaks=length(C.vec),main="C Vector Assignment",xlab="Tables",col="orange")

par(mfrow=c(1,1),cex.lab=2.0,cex.main=2.0,mar=c(6,6,5,2))
plot.ts(apply(beta[,-1], 2,cumsum)/c(1:nrow(beta[,-1])),plot.type="single",lty=1:5,ylab="Issue Space",
	xlab="Iterations",col="purple",lwd=2)
mtext(outer=F,cex=1.3,"Beta Cumulative Sum Plot",line=1.3)

par(mfrow=c(1,2),cex.axis=1,cex.lab=1.5,mar=c(6,4,1,1))
hist(C.summary[30001:50000,1],xlab="Number of Occupied Tables",main="",ylab="",col="lightgrey")
hist(C.summary[30001:50000,2],xlab="Average Number at Tables",main="",ylab="",col="lightgrey")

# NECESSARY FUNCTIONS                  

rmultinorm <- function(num.vals, mu.vec, vcmat, tol = 1e-08)  {
    k <- ncol(vcmat)
    if(length(mu.vec)!=k)
        stop(paste("rmultinorm error: rmultnorm: mu.vec vector wrong length:",length(mu.vec)))
    if(max(abs(vcmat - t(vcmat))) > tol)
        stop("rmultinorm error: variance-covariance matrix not symmetric")
    vs <- svd(vcmat)
    vcsqrt <- t(vs$v %*% (t(vs$u) * sqrt(vs$d)))
    ans.mat <- sweep(matrix(rnorm(num.vals*k), nrow = num.vals) %*% vcsqrt,2,mu.vec,"+")
    dimnames(ans.mat) <- list(NULL, dimnames(vcmat)[[2]])
    return(ans.mat)
}

tstnorm <- function(n,mean=0,sd=1,lower=-Inf,upper=Inf)  
{   accept <- FALSE
    if (is.finite(lower) == FALSE) lower <- min(mean-20*sd,upper-20*sd)
    if (is.finite(upper) == FALSE) upper <- max(mean+20*sd,lower-20*sd)
    while (accept == FALSE)  	
    {   z <- runif(1,lower,upper)
	if ((lower <= 0) & (0 <= upper)) g <- exp(-(z-mean)^2/(2*sd))
	if (upper <= 0)                  g <- exp((upper^2 - (z-mean)^2)/(2*sd))
	if (0 <= lower)                  g <- exp((lower^2 - (z-mean)^2)/(2*sd))
        print(paste("mean=",mean,"sd=",sd,"lower=",lower,"upper=",upper,"z=",z,"g=",g,"M=",M))
	u <- runif(1,0,1)
	if (is.na(g)) print(paste("g is na, z, mean, sd",z,mean,sd))
	if (u <= g) accept <- TRUE
    }
    return(z)
}

# TEST:  tstnorm(1,2.643044,1,-Inf,9.17)


