library(rjags)

data <- read.table('toDF_27feb.tsv', header=T)
classes <- paste("Class", 1:5)
data <- data[data$InSiGHT_Class %in% classes & !is.na(data$CADD_phred), ]
data$InSiGHT_Class <- factor(as.character(data$InSiGHT_Class), labels=classes)

data.jags <- list(insight=data$InSiGHT_Class, cadd=data$CADD_phred, ncat=5)

jags.model("jags.txt", data=data.jags)

model <- jags.model("jags.txt", data=data.jags, inits=list(list(dz=c(NA, 1, 1, 1)), list(dz=c(NA, 5, 5, 5)), list(dz=c(NA, 1, 10, 1))), n.chains=3, n.adapt=500)
samples <- coda.samples(model, variable.names=c('z'), n.iter=500)
dic <- dic.samples(model, n.iter=1000)

outliers <- quantile(as.numeric(dic$deviance), probs=c(0.95))

largedev <- which(as.vector(dic$deviance) >  outliers)

cbind(data[largedev,c("InSiGHT_Class", "CADD_phred")], dic$deviance[largedev])


ilogit <- function(x) { exp(x) / (exp(x) + 1) }

z <- summary(samples)$quantiles[,3]

x <- seq(0, 50, by=0.1)
y <- sapply(1:4, function(i) { ilogit(z[i] - x) })
q <- cbind(y[,1], y[,2]-y[,1], y[,3]-y[,2], y[,4]-y[,3], 1-y[,4])
plot(x, q[,1], type='l', xlab="CADD score", ylab="p(x|x~C)")
for (i in 2:5) {
  lines(x, q[,i], type='l', lty=i)
}

legend("right", legend=paste("Class", 1:5), lty=1:5)

#apply(summary(samples)$quantiles, 1, function(row) {
# lines(x=row[c(1,5)], y=c(0.5, 0.5))
#})

post.prob <- function(cadd) {
  cpi <- c(0, ilogit(z - cadd), 1)
  cpi[2:6] - cpi[1:5]
}


data.missclassified <- cbind(data[largedev,], deviance = dic$deviance[largedev])
post <- t(sapply(data.missclassified$CADD_phred, function(cadd) {
    post.prob(cadd)
}))
colnames(post)  <- paste("Class", 1:5)

cbind(post, "More likely" = apply(post, 1, which.max)) -> classified

write.table(cbind(data.missclassified, classified, InSiGHT = data.missclassified$InSiGHT_Class), "regressed.missclassiefied.tsv", row.names=F, sep="\t")
