library(MCMCpack)
library(msm)
library(coda)
##--------------------------------------ROC curve-------------------
source("~/Research/common/myROC.r")

##-------- Gibbs sampler ------------------
Gibbs.mcmc <- function( iteNum=100, burnin=10, printI=T, simreal="sim",runnum=1){
  ## ---- save MCMC samples
  saveNum <- iteNum-burnin
  s.mcmc <- matrix(factor(0, levels=c(-1,0,1)), nrow=saveNum, ncol=J)   ## s_j \in {1,2,3} =={ n_j0, n_j+, n_j1}
  mu.mcmc <- matrix(0, nrow=saveNum, ncol=J)
  sigmasq.mcmc <- mu.mcmc
  tausq.mcmc <- matrix(0,nrow=saveNum, ncol=3, dimnames=list(NULL, c("tau-","tau0","tau+")))
  lambda.mcmc <- matrix(0, nrow=saveNum, ncol=3, dimnames=list(NULL, c("lambda-","lambda0","lambda+")))
  betaminus.mcmc <- beta0.mcmc <- betaplus.mcmc <- matrix(0, nrow=saveNum, ncol=G)
  alpha.mcmc <- matrix(0,nrow=saveNum, ncol=3, dimnames=list(NULL, c("alpha-","alpha0","alpha+")))

  ##------- set initial value of parameters
  s <- factor(sample( -1:1, J, replace=T), levels=c(-1,0,1))
  mu <- rep(0,J)
  sigmasq <- rep(1,J)
  tausq <- rep(1,3)
  ## lambda[2]==0
  lambda <- c(-1,0,1)
  alpha <- c(0.33,0.33,0.34)
  beta <- matrix(0.5, nrow=G, ncol=3)

  ##--------- set hyper parameters
  L <- max(x)
  u <- 0.01
  v <- 0.01
  alphahat <- table(DE)/length(DE)
  ng <- apply(y, 1, sum)
  a <- t(sapply(ng, function(ngg) ngg*alphahat))
  b <- t(sapply(J-ng, function(ngg) ngg*alphahat))
  ## ---- Gibbs sampler-------------
  Gibbs.mu <- function(j) {
    sjnum <- as.numeric(s[j])
    vr <- 1 / (Ii/sigmasq[j] + 1/tausq[ sjnum ])
    avgup <- sum(x[,j])/sigmasq[j] + lambda[ sjnum ]/tausq[ sjnum ]
    mu[j] <<- rnorm( 1, avgup*vr, sqrt(vr))
  }
  ##Gibbs.mu(1) 
  Gibbs.sigmasq <- function(j) {
    sigmasq[j] <<- rinvgamma(1, Ii/2+u, sum((x[,j]-mu[j])^2)/2+v)
  }
  ##Gibbs.sigmasq(1)
  Gibbs.tausq <- function() {
    ts0 <- table(s)
    ts0[ts0==0] <- 1
    tausq <<- rinvgamma(3, u+ ts0 /2,   v + c(sum( (mu[s==-1]-lambda[1])^2), sum( (mu[s==0])^2),sum( (mu[s==1]-lambda[3])^2))/2)
  }
  ##Gibbs.tausq()
  Gibbs.lambda <- function() {##lambda[2]==0
    mn <- mean(mu[s== -1])
    lambda[1] <<- rtnorm(1, mean=ifelse(is.na(mn),0,mn), sd=sqrt(tausq[1]/(0.5+sum(s==-1))), lower=-L, upper=0)
    mn <- mean(mu[s== 1])
    lambda[3] <<- rtnorm(1, mean=ifelse(is.na(mn),0,mn), sd=sqrt(tausq[3]/(0.5+sum(s== 1))), lower=0, upper=L)
  }
  Gibbs.s <- function(j) {
    qvec <- rep(0,3)
    for (i in 1:3) {
      qvec[i] <- dnorm(mu[j], mean=lambda[i], sd=sqrt( tausq[i] )) * alpha[i] * prod( dbinom(y[,j], 1, beta[,i]) )
    }
    s[j] <<- which(rmultinom(1,1,qvec/sum(qvec))==1)-2
  }
  Gibbs.beta <- function(g) {
    yy <-  tapply(y[g,], s, sum )
    yy[is.na(yy)] <- 0.01
    beta[g,] <<- rbeta(3, yy+a[g,], table(s)-yy+b[g,])
  }
  Gibbs.alpha <- function() {
     alpha <<- rdirichlet(1, table(s)+1 )

  }
  
  ##--- start Gibbs sampler here
  ##randord <- rep(0,J)
  ## expression of running one step of MCMC
  runOneMcmc <- expression( 
      {
        ##randord[1:J] <- sample(J)
        for (j in 1:J) Gibbs.mu(j)
        ##randord[1:J] <- sample(J)
        for (j in 1:J) Gibbs.sigmasq(j)
        Gibbs.tausq()
        Gibbs.lambda()
        ##randord[1:J] <- sample(J)
        for (j in 1:J)  Gibbs.s(j)
        
        ##randord[1:J] <- sample(G)
        for (g in 1:G)  Gibbs.beta(g)
        Gibbs.alpha()
      })

  ##for burnin
  for (ite in 1:burnin) {
    if(printI && ite %% 50==0) {print(ite)}
    eval(runOneMcmc)
  }
  ##after burnin
  for (ite in 1:saveNum) {
    if(printI && ite %% 50==0) {print(ite)}
    eval(runOneMcmc)
    s.mcmc[ite,] <- s
    mu.mcmc[ite,] <- mu
    sigmasq.mcmc[ite,] <- sigmasq
    tausq.mcmc[ite,] <- tausq
    lambda.mcmc[ite,] <- lambda
    betaminus.mcmc[ite,] <- beta[,1]
    beta0.mcmc[ite,] <- beta[,2]
    betaplus.mcmc[ite,] <- beta[,3]
    alpha.mcmc[ite,] <- alpha
  }
  save(s.mcmc, mu.mcmc, sigmasq.mcmc, tausq.mcmc, lambda.mcmc, betaminus.mcmc, beta0.mcmc, betaplus.mcmc, alpha.mcmc, file=paste(simreal,runnum,".rdata", sep=""))
}

##-------- Gibbs sampler--Bayesian alone ------------------
ba.Gibbs.mcmc <- function( iteNum=100,burnin=10, printI=T, simreal="sim", runnum=1){
  ## ---- save MCMC samples
  saveNum <- iteNum-burnin
  s.mcmc <- matrix(factor(0, levels=c(-1,0,1)), nrow=saveNum, ncol=J)   ## s_j \in {1,2,3} =={ n_j0, n_j+, n_j1}
  mu.mcmc <- matrix(0, nrow=saveNum, ncol=J)
  sigmasq.mcmc <- mu.mcmc
  tausq.mcmc <- matrix(0,nrow=saveNum, ncol=3, dimnames=list(NULL, c("tau-","tau0","tau+")))
  lambda.mcmc <- matrix(0, nrow=saveNum, ncol=3, dimnames=list(NULL, c("lambda-","lambda0","lambda+")))
  ##betaminus.mcmc <- beta0.mcmc <- betaplus.mcmc <- matrix(0, nrow=saveNum, ncol=G)
  alpha.mcmc <- matrix(0,nrow=saveNum, ncol=3, dimnames=list(NULL, c("alpha-","alpha0","alpha+")))

  ##------- set initial value of parameters
  s <- factor(DE, levels=c(-1,0,1)) ##sample( -1:1, J, replace=T)
  mu <-  rep(0,J) ##uu
  sigmasq <- rep(0.75^2,J)
  tausq <- rep(1,3)
  ## lambda[2]==0
  lambda <- c(-0.5,0,0.5)
  alpha <- c(0.2, 0.6, 0.2)
  ##beta <- matrix(0.5, nrow=G, ncol=3)

  ##--------- set hyper parameters
  L <- max(x)
  u <- 0.01
  v <- 0.01
  alphahat <- table(DE)/length(DE)
  ##ng <- apply(y, 1, sum)
  ##a <- t(sapply(ng, function(ngg) ngg*alphahat))
  ##b <- t(sapply(J-ng, function(ngg) ngg*alphahat))
  ## ---- Gibbs sampler-------------
  Gibbs.mu <- function(j) {
    sjnum <- as.numeric(s[j])
    vr <- 1 / (Ii/sigmasq[j] + 1/tausq[ sjnum ])
    avgup <- sum(x[,j])/sigmasq[j] + lambda[ sjnum ]/tausq[ sjnum ]
    mu[j] <<- rnorm( 1, avgup*vr, sqrt(vr))
  }
  Gibbs.sigmasq <- function(j) {
    sigmasq[j] <<- rinvgamma(1, Ii/2+u, sum((x[,j]-mu[j])^2)/2+v)
    ##if( sigmasq[j]>1000 ) browser()
  }
  Gibbs.tausq <- function() {
    ts0 <- table(s)
    ts0[ts0==0] <- 1
    tausq <<- rinvgamma(3, u+ ts0/2,   v + c(sum( (mu[s==-1]-lambda[1])^2), sum( (mu[s==0])^2),sum( (mu[s==1]-lambda[3])^2))/2)
  }
  Gibbs.lambda <- function() {##lambda[2]==0
    mn <- mean(mu[s== -1])
    lambda[1] <<- rtnorm(1, mean=ifelse(is.na(mn),0,mn), sd=sqrt(tausq[1]/(0.5+sum(s==-1))), lower=-L, upper=0)
    mn <- mean(mu[s== 1])
    lambda[3] <<- rtnorm(1, mean=ifelse(is.na(mn),0,mn), sd=sqrt(tausq[3]/(0.5+sum(s== 1))), lower=0, upper=L)
  }
  Gibbs.s <- function(j) {
    qvec <- rep(0,3)
    for (i in 1:3) {
      qvec[i] <- dnorm(mu[j], mean=lambda[i], sd=sqrt( tausq[i] )) * alpha[i] ## * prod( dbinom(y[,j], 1, beta[,i]) )
    }
    s[j] <<- which(rmultinom(1,1,qvec/sum(qvec))==1)-2
  }
  ##Gibbs.beta <- function(g) {
  ##  yy <-  tapply(y[g,], s, sum )
  ##  yy[is.na(yy)] <- 0.01
  ##  beta[g,] <<- rbeta(3, yy+a[g,], table(s)-yy+b[g,])
  ##}
  Gibbs.alpha <- function() {
     alpha <<- rdirichlet(1, table(s)+1 )
  }
  
  ##--- start Gibbs sampler here
  ##randord <- rep(0,J)
  ## expression of running one step of MCMC  
  runOneMcmc <- expression( 
      {
        ##randord[1:J] <- sample(J)
        for (j in 1:J) Gibbs.mu(j)
        ##randord[1:J] <- sample(J)
        for (j in 1:J) Gibbs.sigmasq(j)
        Gibbs.tausq()
        Gibbs.lambda()
        ##randord[1:J] <- sample(J)
        for (j in 1:J)  Gibbs.s(j)
        ##randord[1:J] <- sample(G)
        ##for (g in randord)  Gibbs.beta(g)
        Gibbs.alpha()
      })

  ##for burnin
  for (ite in 1:burnin) {
    if(printI && ite %% 50==0 ) {print(ite)}
    eval(runOneMcmc)
  }
  ##after burnin
  for (ite in 1:saveNum) {
    if(printI && ite %% 50==0 ) {print(ite)}
    eval(runOneMcmc)
    s.mcmc[ite,] <- s
    mu.mcmc[ite,] <- mu
    sigmasq.mcmc[ite,] <- sigmasq
    tausq.mcmc[ite,] <- tausq
    lambda.mcmc[ite,] <- lambda
    ##betaminus.mcmc[ite,] <- beta[,1]
    ##beta0.mcmc[ite,] <- beta[,2]
    ##betaplus.mcmc[ite,] <- beta[,3]
    alpha.mcmc[ite,] <- alpha
  }
  save(s.mcmc, mu.mcmc, sigmasq.mcmc, tausq.mcmc, lambda.mcmc, alpha.mcmc, file=paste(simreal,runnum,"_ba.rdata",sep=""))
}

################GSEA############
GSEA.EnrichmentScore2 <- function(gene.list, gene.set, weighted.score.type = 1, correl.vector = NULL) {
#
# Computes the weighted GSEA score of gene.set in gene.list. It is the same calculation as in 
# GSEA.EnrichmentScore but faster (x8) without producing the RES, arg.RES and tag.indicator outputs.
# This call is intended to be used to asses the enrichment of random permutations rather than the 
# observed one.
# The weighted score type is the exponent of the correlation 
# weight: 0 (unweighted = Kolmogorov-Smirnov), 1 (weighted), and 2 (over-weighted). When the score type is 1 or 2 it is 
# necessary to input the correlation vector with the values in the same order as in the gene list.
#
# Inputs:
#   gene.list: The ordered gene list (e.g. integers indicating the original position in the input dataset)  
#   gene.set: A gene set (e.g. integers indicating the location of those genes in the input dataset) 
#   weighted.score.type: Type of score: weight: 0 (unweighted = Kolmogorov-Smirnov), 1 (weighted), and 2 (over-weighted)  
#  correl.vector: A vector with the coorelations (e.g. signal to noise scores) corresponding to the genes in the gene list 
#
# Outputs:
#   ES: Enrichment score (real number between -1 and +1) 
#
# The Broad Institute
# SOFTWARE COPYRIGHT NOTICE AGREEMENT
# This software and its documentation are copyright 2003 by the
# Broad Institute/Massachusetts Institute of Technology.
# All rights are reserved.
#
# This software is supplied without any warranty or guaranteed support
# whatsoever. Neither the Broad Institute nor MIT can be responsible for
# its use, misuse, or functionality.

   N <- length(gene.list)
   Nh <- length(gene.set)
   Nm <-  N - Nh

   loc.vector <- vector(length=N, mode="numeric")
   peak.res.vector <- vector(length=Nh, mode="numeric")
   valley.res.vector <- vector(length=Nh, mode="numeric")
   tag.correl.vector <- vector(length=Nh, mode="numeric")
   tag.diff.vector <- vector(length=Nh, mode="numeric")
   tag.loc.vector <- vector(length=Nh, mode="numeric")

   loc.vector[gene.list] <- seq(1, N)
   tag.loc.vector <- loc.vector[gene.set]

   tag.loc.vector <- sort(tag.loc.vector, decreasing = F)

   if (weighted.score.type == 0) {
      tag.correl.vector <- rep(1, Nh)
   } else if (weighted.score.type == 1) {
      tag.correl.vector <- correl.vector[tag.loc.vector]
      tag.correl.vector <- abs(tag.correl.vector)
   } else if (weighted.score.type == 2) {
      tag.correl.vector <- correl.vector[tag.loc.vector]*correl.vector[tag.loc.vector]
      tag.correl.vector <- abs(tag.correl.vector)
   } else {
      tag.correl.vector <- correl.vector[tag.loc.vector]**weighted.score.type
      tag.correl.vector <- abs(tag.correl.vector)
   }

   norm.tag <- 1.0/sum(tag.correl.vector)
   tag.correl.vector <- tag.correl.vector * norm.tag
   norm.no.tag <- 1.0/Nm
   tag.diff.vector[1] <- (tag.loc.vector[1] - 1)
   tag.diff.vector[2:Nh] <- tag.loc.vector[2:Nh] - tag.loc.vector[1:(Nh - 1)] - 1
   tag.diff.vector <- tag.diff.vector * norm.no.tag
   peak.res.vector <- cumsum(tag.correl.vector - tag.diff.vector)
   valley.res.vector <- peak.res.vector - tag.correl.vector
   max.ES <- max(peak.res.vector)
   min.ES <- min(valley.res.vector)
   ES <- signif(ifelse(max.ES > - min.ES, max.ES, min.ES), digits=5)

   return(list(ES = ES))
}
