# ZINB with random effect
# last modified on 15/12/2000; 5/11/2002
#
# Original Splus code provided by Drs. Andy Lee and Kelvin Yau
#
# Modifications for R by Dave Atkins
#
# For relevant papers, see Drs. Lee and Yau websites:
#
# http://fbstaff.cityu.edu.hk/mskyau/
# http://www.publichealth.curtin.edu.au/html/about_staffprofile.cfm?ID=482
#
## ---------------------------------------
#  MLE of scale parameter alpha of NB regression with weights w
# ---------------------------------------

agetk.ml <- function(y, mu, w) # calculate the k
{
loglik <- function(th,y,mu,w)
{
u <- exp(th)/(exp(th)+mu)
(sum(w*(log(gamma(y+exp(th))/gamma(y+1)/gamma(exp(th)))+exp(th)*log(u)+log((1-u)^y))))
}
objm <- optimize(loglik,lower =-8, upper =5, y=y,mu=mu,w=w,maximum=T)
res <- objm$maximum
1/exp(res)
}

## ---------------------------------------

#  GLMM of Poisson regression with weights mzk

# ---------------------------------------

wreml.poi <- function(y, mzk, x, z, beta1, va1, sig2, fam="Poisson", epsilon=1e-3)
{
M <- ncol(z);n <- length(y)
X <- cbind(1,x);p1 <- ncol(X)
zero1 <- matrix(0,ncol=p1,nrow=M)	
X1 <- rbind(X,zero1)
Z <- rbind(z,diag(M))
XX <- cbind(X1,Z)

itmax <- 1000;
alfa0 <- c(beta1,va1)
beta <- beta1 ; va <- va1
flag <- 0
for(iter in 1:itmax)
{
theta <- as.vector(X%*%beta+z%*%va)
lamda <- exp(theta)
w1 <- mzk*lamda
w <- c(w1,rep(1/sig2,M))
mu <- lamda

w.sq <- w^0.5
zy <- c((theta+(y-mu)/mu),rep(0,M))*w.sq
zx <- XX*w.sq
tfit <- lm.fit(zx,zy) # Dave: change to lm.fit
  
Alfa <- coef(tfit)
beta <- Alfa[1:p1]
va <- Alfa[(p1+1):(p1+M)]
if(max(abs(Alfa-alfa0))<epsilon) { flag <- 1;break}
alfa0 <- Alfa;
#cat(beta, iter,'\n')
}
if(flag)  result <- list(beta = beta, va = va)
else stop("error: not reach the convergence")
}

# ---------------------------------------

#  GLMM of Logistic regression

# ---------------------------------------
wreml.logit <- function(y,x,z,alfa1,yu1,sig1,famaly="logistic",epsilon=1e-3)
{
M <- ncol(z);n <- length(y)
X <- cbind(1,x)
p1 <- ncol(X)
zero1 <- matrix(0,ncol=p1,nrow=M)	
X1 <- rbind(X,zero1)
Z <- rbind(z,diag(M))
XX <- cbind(X1,Z)

itmax <- 1000
alfa0 <- c(alfa1,yu1)
alfa <- alfa1
yu <- yu1;flag <- 0
for(iter in 1:itmax)
{
theta <- as.vector(X%*%alfa+z%*%yu)
w1 <- exp(theta)/(1+exp(theta))^2
w <- c(w1,rep(1/sig1,M))
mu <- exp(theta)/(1+exp(theta))

w.sq <- w^0.5
zy <- c((theta+(y-mu)/w1),rep(0,M))*w.sq
zx <- XX*w.sq
tfit <- lm.fit(zx,zy) # Dave: change to lm.fit
  
Alfa <- coef(tfit)
alfa <- Alfa[1:p1]
yu <- Alfa[(p1+1):(p1+M)]

if(max(abs(Alfa-alfa0))<epsilon) { flag <- 1;break}
alfa0 <- Alfa
}
if(flag) reslt <- list(alfa=alfa,yu=yu)
else stop("error:not reach the convergence")
}

hznb <- function(y,X,G,Z,pai,mu,th,sig=NULL,sigu=NULL)
{

yzero <- ifelse(y>0,0,1)
ksi <- pai/(1-pai)
u <- th/(th+mu)
ep1 <- u^th
ep2 <- (ksi+ep1)^2

# information matrix

w11<--(yzero*ksi*ep1/ep2-ksi/(1+ksi)^2)
w12<--(yzero*th*ksi*ep1*(1-u)/ep2)

w22 <- (-yzero*th*ksi*(ksi+ep1+(1-u)*th*ep1/u)/ep2+th+(1-yzero)*y)*u*(1-u)

# second derivtive of alpha
#B<-log(u)+(1-u)
#B1<-(1-u)^2/th
#w23<-(1-u-(1-yzero)*y*u/th+yzero*ksi*(u/(ksi+ep1)/th+ep1*B/ep2))*(1-u)
#-------------------------------------
pa <- ncol(G)
pb <- ncol(X)
p <- ncol(Z)

ww11 <- t(matrix(rep(w11,pa),ncol=pa))
ww12 <- t(matrix(rep(w12,pa),ncol=pa))
ww22 <- t(matrix(rep(w22,pb),ncol=pb))
m11 <- (t(G)*ww11)
m12 <- (t(G)*ww12)
m22 <- (t(X)*ww22)

I11 <- m11%*%G
I12 <- m12%*%X

I22 <- m22%*%X

if(!is.null(sig))
{
z22 <- t(Z)*t(matrix(rep(w22,p),ncol=p))
I14 <- m12%*%Z
I24 <- m22%*%Z
I44 <- z22%*%Z+diag(1/sig,p)
}
if(!is.null(sigu))
{
z11 <- t(Z)*t(matrix(rep(w11,p),ncol=p))
z12 <- (matrix(rep(w12,p),ncol=p))*Z

I13 <- m11%*%Z
I23 <- t(X)%*%z12
I33 <- z11%*%Z+diag(1/sigu,p)
}

if(is.null(sigu)&is.null(sig))
{
	V1 <- cbind(I11,I12)
	V2 <- cbind(t(I12),I22)
	V <- rbind(V1,V2)
}
if((!is.null(sigu))&(!is.null(sig)))
{
	I34 <- t(Z)%*%z12

	V1 <- cbind(I11,I12,I13,I14)
	V2 <- cbind(t(I12),I22,I23,I24)
	V3 <- cbind(t(I13),t(I23),I33,I34)
	V4 <- cbind(t(I14),t(I24),t(I34),I44)
	V <- rbind(V1,V2,V3,V4)
	
	M2 <- diag(0,(pa+pb+p+p))
	M2[pa+pb+1:p,pa+pb+1:p] <- diag(1/sigu,p)
	M2[(pa+pb+p)+1:p,(pa+pb+p)+1:p] <- diag(1/sig,p)

	H <- V-M2
}

if(is.null(sigu)&(!is.null(sig)))
{	
	V1 <- cbind(I11,I12,I14)
	V2 <- cbind(t(I12),I22,I24)
	V4 <- cbind(t(I14),t(I24),I44)
	V <- rbind(V1,V2,V4)
}
if((!is.null(sigu))&(is.null(sig)))
{	
	V1 <- cbind(I11,I12,I13)
	V2 <- cbind(t(I12),I22,I23)
	V3 <- cbind(t(I13),t(I23),I33)

	V <- rbind(V1,V2,V3)
}

	IV <- solve(V)
if((!is.null(sigu))&(!is.null(sig)))
{
	df <- length(y)-sum(diag(IV%*%H))
   list(dd=diag(IV),df=df) 	
}

else dd <- diag(IV)

				
}

################### Main Estimation Function ###################################

zinbmix <- function(y, x.p=NULL, rv=NULL, random, x.l=NULL, model)
{	
itmax <- 1000
n <- length(y)
yz <- ifelse(y > 0, 0, 1)
ct0 <-  list(epsilon = 0.001, maxit = 50, trace = F)
if(!is.null(x.l))
{
x.l <- as.matrix(x.l)	
G <- cbind(1,x.l)
alfa <- coef(glm(yz ~ x.l, family = binomial(link = logit), 
                  na.action = na.omit, control = ct0))
}
else
{       		 
alfa <- coef(glm(yz ~ 1, family = binomial(link = logit), 
                  na.action = na.omit, control = ct0))
G <- as.matrix(rep(1,n))
}
if(!is.null(x.p))
{
x.p <- as.matrix(x.p)	
X <- cbind(1,x.p)
beta <- coef(glm(y ~ x.p, family = poisson(link =log), 
                  na.action = na.omit, control = ct0))
}
else
{       	 
beta <- coef(glm(y ~ 1, family = poisson(link = log), 
                  na.action = na.omit, control = ct0))
X <- as.matrix(rep(1,n))
}
	
pa <- ncol(G)
pb <- ncol(X)
m <- ncol(rv)

#initial value
ZK1 <- ifelse(y > 0, 1, 0)
th <- 1

yu <- rep(0., m)
va <- rep(0, m)
sigu <- 1.2
sig2 <- 0.1
names(sig2) <- "RandomEffect"
flag <- 0
# beginning of outer loop
for( ie in 1:itmax)
{
for (iter in 1:itmax)
{
 if(is.null(x.l))
	theta <- as.vector(exp(G*alfa))
 else	
	{
		if(model == "rnb" | model == "zinb" )
		theta <- as.vector(exp(G %*% alfa))
		else theta <- as.vector(exp(G %*% alfa+rv%*%yu))
	}	
	
if(is.null(x.p))
	mu <- as.vector(exp(X*beta))
else	
	{
		if(model == "rlg" | model == "zinb" )
		mu <- as.vector(exp(X %*% beta))
		else mu <- as.vector(exp(X%*%beta+rv%*%va))
	}	
	k <- agetk.ml(y,mu,(1-ZK1))
	th <- 1/k
	#  E-step
	ZK <- ifelse(y > 0, 0, 1/(1+1/theta*(th/(mu+th))^th))
	#  M-step
	wmm <- 1/(1+k*mu)*(1-ZK) #weight

   if(!is.null(x.l))
	{
	if((model != "rnb")&(model != "zinb"))
	{
      lgt <- wreml.logit(ZK,x.l,rv,alfa,yu,sigu)
		alfa <- lgt$alfa
		yu <- lgt$yu
	}	
	else
			alfa <- coef(glm(ZK ~ x.l, family = binomial(link = logit), 
                  na.action = na.omit, control = ct0))
	}
	else alfa <- coef(glm(ZK ~ 1, family = binomial(link = logit), 
                  na.action = na.omit, control = ct0))

   if(!is.null(x.p))
	{
		if(model != "rlg" & model != "zinb")
	{
		glm.poi <- wreml.poi(y,wmm,x.p,rv,beta,va,sig2)
		beta <- glm.poi$beta
		va <- glm.poi$va
	}
		else
		beta <- coef(glm(y ~ x.p, family = poisson(link =log),
					       weights = wmm, na.action = na.omit, control = ct0))
	}
	else 
	beta <- coef(glm(y ~ 1, family = poisson(link =log), 
					       weights = wmm, na.action = na.omit, control = ct0))
	
	if(max(abs(ZK-ZK1))<1e-3) {flag <- 1;break;}
   	ZK1 <- ZK	
cat('\n',iter,k,alfa,'\n')
} # end of inner loop

if(is.null(x.l)) pai <- mean(ZK1)
else pai  <-  theta/(1 + theta)
	
	if(model=="zinbmix")
	{  
		hz <- hznb(y,X,G,rv,pai,mu,th,sig2,sigu)
		vd <- hz$dd
		nsigu  <- as.numeric(t(yu)%*%yu+sum(vd[pa+pb+1:m]))/m
		nsigv2  <- as.numeric(t(va)%*%va+sum(vd[pa+pb+m+1:m]))/m
		if(nsigv2<=0) nsigv2 <- 1
		if(nsigu<=0) nsigu <- 1
	#	cat("model=zinbmix")	
	}
	if(model=="rnb")
	{	vd <- hznb(y,X,G,rv,pai,mu,th,sig2)
		nsigv2  <- as.numeric(t(va)%*%va+sum(vd[(pa+pb+1):(pa+pb+m)]))/m
		if(nsigv2<=0)
		nsigv2 <- 1;
		sigu <- nsigu <- 1;
		cat("model=rnb")
    }  
	if(model=="rlg")         
	{	vd <- hznb(y,X,G,rv,pai,mu,th,,sigu)
		nsigu  <- as.numeric(t(yu)%*%yu+sum(vd[(pa+pb+1):(pa+pb+m)]))/m
		if(nsigu<=0) nsigu <- 1;
		sig2 <- nsigv2 <- 1;
		cat("model=rlg")
	    } 
	if(model=="zinb") 
	{
	vd <- hznb(y,X,G,rv,pai,mu,th)	
	flag <- 1;cat("model=zinb")
	break
	}          
if((abs(nsigv2-sig2)<1e-4)&(abs(nsigu-sigu)<1e-4)) {flag <- 1;break}
	sig2 <- nsigv2	
	sigu <- nsigu
cat('\n',ie,sig2,sigu,th,'\n')
} #end of loop

names(beta) <- c("Intercept", dimnames(x.p)[[2]])  # NOTE: problem here?
names(alfa) <- c("Intercept", dimnames(x.l)[[2]])
std <- sqrt(vd)

if(model=="zinbmix")
{
	risku <- as.vector(yu)#/std[pa+pb+1:m])
	riskv <- as.vector(va)#/std[pa+pb+m+1:m])
	names(riskv) <- names(risku) <- names(table(random))
}
if(model=="rnb")
{
riskv <- as.vector(va/std[pa+pb+1:m])
names(riskv) <- names(table(random))
#cat("OK2")
}
if(model=="rlg")
{
	risku <- as.vector(yu/std[pa+pb+1:m])
	names(risku) <- names(table(random))
}

stda <- std[1:pa]
stdb <- std[pa+1:pb]

eta <- cbind(alfa,stda,alfa/stda,2*(1-pnorm(abs(alfa/stda))))
dimnames(eta) <- list(names(alfa),c("Estimate","SD","t-value","p-value"))
etb <- cbind(beta,stdb,beta/stdb,2*(1-pnorm(abs(beta/stdb))))
dimnames(etb) <- list(names(beta),c("Estimate","SD","t-value","p-value"))
eta <- round(eta,3)
etb <- round(etb,3)

obj.call <- match.call()
if(model=="zinbmix")
{
	result <- list(call=obj.call, th=th, pai=pai, mu=mu, beta=etb, alfa=eta)
	result$sigv <- round(sig2,4)
	result$riskv <- round(riskv,4)
	result$sigu <- round(sigu,4)
	result$risku <- round(risku,4)
	result$df <- hz$df
	#cat("OKmix")

}
if(model=="rnb")
{
result <- list(call=obj.call, th=th, beta=etb, pai=pai, mu=mu, alfa=eta, 
                sigv=sig2, riskv=round(riskv,4))
cat("OKrnb")
}
if(model=="rlg")
{	result <- list(call=obj.call, th=th, pai=pai, mu=mu, beta=etb, alfa=eta)
	result$sigu <- round(sigu,4)
	result$risku <- round(risku,4)
	cat("OKrlg")

}
if(model=="zinb")
{result <- list(call=obj.call, th=th, beta=etb, pai=pai, mu=mu, alfa=eta)
cat("OKzinb")
}
if(flag) result
else stop("error:not reach the convergence")
}

#-----------------
# main function
#-----------------
# function sumzinbmix()
# y------count
# x.nb-----covariate matrix for mean
# x.l ----covariate matrix for logistic part
# random, r.nb,r.l are flags for whether including random effects in the models.

sumzinbmix <- function(y, x.nb=NULL, x.l=NULL, random=NULL, r.nb=F, r.l=F)
{
	if(!is.null(random))
	{Group <- c(as.factor(random)) # DAVE: codes --> c
		n <- length(y)
		m <- max(Group)
		z <- matrix(0, ncol = m, nrow = n)
		for(i in 1:m) 
		z[, i] <- ifelse(Group == i, 1,0)
	}
	else z <- diag(10)
if(r.nb&r.l)
	obj <- zinbmix(y, x.nb, z, random, x.l, model="zinbmix")	# random effects in both parts
if(r.nb&(!r.l))
	obj <- zinbmix(y, x.nb, z, random, x.l, model="rnb")      # random in nb part	
if((!r.nb)&r.l)
	obj <- zinbmix(y, x.nb, z, random, x.l, model="rlg")      # random in logistic part	
if((!r.nb)&(!r.l))
	obj <- zinbmix(y, x.nb, z, random, x.l, model="zinb")     # no random 	

#read the object
	pai <- obj$pai
	th <- obj$th
	beta <- obj$beta
	alfa <- obj$alfa #parameters in logistic part
	mu <- obj$mu #mean of y
	
#*****************************************************************

#*****************************************************************
#--------------calculate the sumarry results  -------------------	 #frequency

k <- max(y) ; n <- length(y)
fr.z <- fr.ob <- 0:k
fr.z[1] <- sum(pai+(1-pai)*(th/(th+mu))^th)
for(jj in 1:k){
fr.z[jj+1] <- sum((gamma(jj+th)/gamma(jj+1)/gamma(th)*(th/(mu+th))^th*(mu/(mu+th))^jj)*(1-pai))
}
for(jj in 0:k){fr.ob[jj+1] <- sum(ifelse(y==jj,1,0))}
fr.ob <- round(fr.ob[1:(k+1)],3);
fr.z <- round(fr.z[1:(k+1)],3)
devia <<- cbind(0:k,(fr.z-fr.ob)/n)

fr.with <- matrix(c((0:k),fr.ob,fr.z),ncol=3)
dimnames(fr.with) <- list(NULL,c("count","observed","expected"))


fr.ob <- c(fr.ob[fr.z>5],n-sum(fr.ob[fr.z>5]))
fr.z <- c(fr.z[fr.z>5],n-sum(fr.z[fr.z>5]))
chi.z <- sum((fr.ob-fr.z)^2/fr.z)
pv <- 1-pchisq(chi.z,length(fr.z)-4)

#std error of alpha

ksi <- pai/(1-pai)
u <- th/(th+mu)
yzero <- ifelse(y > 0, 0, 1)
#____________________________________
f0 <- table(y[y > 0])
f <- rep(0, k)
f[as.numeric(names(f0))] <- f0
tot <- sum(f0)
f <- tot + f - cumsum(f)
#------------------------------------	

# first deriv of A(th)
i <- sum(f/(th+1:k-1))
ii <- sum(f/(th + 1:k-1)^2)

B <- log(u)+(1-u)
B1 <- (1-u)^2/th
ep1 <- u^th
ep2 <- (ksi+ep1)^2

w33 <- sum(yzero*ksi*(B1*(ksi+ep1)-B^2*ep1)/ep2-B1-(1-yzero)*y*(u/th)^2)+ii
sdc <- sqrt(1/w33)/th^2
etc <- cbind(1/th,sdc,1/sdc/th,2*(1-pnorm(abs(1/sdc/th))))
etc <- round(etc,4)
dimnames(etc) <- list("alpha",c("Estimate","SD","t-value","p-value"))

# log-likehood

loglik <- (sum(yzero*log(ksi+ep1)-log(1+ksi)+(1-yzero)*(lgamma(y+th)-lgamma(y+1)-lgamma(th)+log(ep1)+y*log(1-u))))

#pearson residuals

mu.y <- (1-pai)*mu
std.y <- sqrt(mu.y*(1+(1/th+pai)*mu))
r.p <- (y-mu.y)/std.y 
PearChi <- round(sum(r.p^2),3)   #Pearson residuals
r.p <- round(r.p,4)   #residuals

#Deviance residual

lnb <- (lgamma(y+th)-lgamma(y+1)-lgamma(th)+th*log(th/(y+th))+log((y/(th+y))^y))

lzinb <- (yzero*log(ksi+ep1)-log(1+ksi)+(1-yzero)*(lgamma(y+th)-lgamma(y+1)-lgamma(th)+log(ep1)+y*log(1-u)))

d <- 2*(lnb-lzinb)
Dev <- round(sum(d),3)# Deviance;
r.d <- round(sign(y-mu.y)*sqrt(d),4) #Deviance residuals

residm <<- cbind(y, mu.y, r.p, r.d, lnb, lzinb)    # output to a data sheet

#-----------------------------------------------------
#******************************************************
#******************************************************
# setTextOutputRouting("Report","Default") # DAVE: escape text routing
cat("\n","Fit of ZINB mixed model",'\n')	
cat('\n',"inflated part",'\n')
print(obj$alfa)
cat('\n','_____________________________________','\n\n')

cat('\n\n',"negative binomial part",'\n')
print(obj$beta)
cat('\n','_____________________________________','\n\n')
print(etc)

if(r.l)
{
cat('\n',"sigma^2 of random effect in inflate :", round(obj$sigu,5),'\n')
cat('\n',"random effects:\n")
risku <<- as.matrix(obj$risku)
print(obj$risku)
cat('\n','_____________________________________','\n\n')

}
if(r.nb)
{
cat('\n',"sigma^2 of random effect in NB :", round(obj$sigv,5),'\n')
cat('\n',"random effects:\n")
riskv <<- as.matrix(obj$riskv)   #
print(obj$riskv)
cat('\n','_____________________________________','\n\n')
}

cat('\n',"--------------------------------------",'\n\n')
obj$count.tab <- fr.with      # Dave: add table of counts to output object
print(fr.with)
cat('\n','_____________________________________','\n\n')
cat('\n\n',"Chisquare test statistics: ",round(chi.z,3))
cat('\n',"loglikelihood              : ",round(loglik,3))
cat('\n',"Pearson residuals          : ",round(PearChi,3))
obj$chi.sq <- chi.z           # Dave: save indices to model object
obj$loglik <- loglik
# cat('\n',"Deviance                   : ",round(Dev, 3))
# cat('\n', round(obj$df,3)) # Dave: error msg ?

# setTextOutputRouting("Default","Default") # Dave: escape text-outputting
# cat("\n")
# return("End of program.  See results in the report window !") # Dave: escape
#
### Dave: output whole model object
return(obj)
}

# example
# sumzinbmix(y,x.nb=NULL,,x.l=NULL,random=NULL,r.nb=F,r.l=F)

######################### Coefficient Extraction Function ######################

zinb.coef <- function(obj, dig=2){
  lr.coef <- data.frame(obj$alfa)
  nb.coef <- data.frame(obj$beta)
  lr.coef$OR <- exp(lr.coef[,"Estimate"])
  lr.coef$Lower <- exp(lr.coef[,"Estimate"] - 1.96*lr.coef[,"SD"])
  lr.coef$Upper <- exp(lr.coef[,"Estimate"] + 1.96*lr.coef[,"SD"])
  nb.coef$exp.b <- exp(nb.coef[,"Estimate"])
  nb.coef$Lower <- exp(nb.coef[,"Estimate"] - 1.96*nb.coef[,"SD"])
  nb.coef$Upper <- exp(nb.coef[,"Estimate"] + 1.96*nb.coef[,"SD"])
  cat("\n", "Logistic Model", "\n")
  print(round(lr.coef, dig))
  cat("\n", "Negative Binomial Model", "\n")
  print(round(nb.coef, dig))
  }