/*
 * Decompiled with CFR 0.152.
 */
package ca.mcgill.mcb.pcingola.snpEffect.commandLine;

import ca.mcgill.mcb.pcingola.codons.CodonTables;
import ca.mcgill.mcb.pcingola.collections.AutoHashMap;
import ca.mcgill.mcb.pcingola.interval.Chromosome;
import ca.mcgill.mcb.pcingola.interval.Gene;
import ca.mcgill.mcb.pcingola.interval.Genome;
import ca.mcgill.mcb.pcingola.interval.Marker;
import ca.mcgill.mcb.pcingola.interval.Markers;
import ca.mcgill.mcb.pcingola.interval.NextProt;
import ca.mcgill.mcb.pcingola.interval.Transcript;
import ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff;
import ca.mcgill.mcb.pcingola.snpEffect.commandLine.TranscriptData;
import ca.mcgill.mcb.pcingola.stats.CountByType;
import ca.mcgill.mcb.pcingola.util.Gpr;
import ca.mcgill.mcb.pcingola.util.GprSeq;
import ca.mcgill.mcb.pcingola.util.Timer;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.zip.GZIPInputStream;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class SnpEffCmdBuildNextProt
extends SnpEff {
    public static final double HIGHLY_CONSERVED_AA_PERCENT = 0.99;
    public static final int HIGHLY_CONSERVED_AA_COUNT = 30;
    public static final String[] CATAGORY_BLACK_LIST_STR = new String[]{"", "sequence variant", "sequence conflict", "mature protein", "mutagenesis site", "retained intron"};
    public static final String NODE_NAME_PROTEIN = "protein";
    public static final String NODE_NAME_GENE = "gene";
    public static final String NODE_NAME_TRANSCRIPT = "transcript";
    public static final String NODE_NAME_ANNOTATION = "annotation";
    public static final String NODE_NAME_ANNOTATION_LIST = "annotationList";
    public static final String NODE_NAME_POSITION = "position";
    public static final String NODE_NAME_PROPERTY = "property";
    public static final String NODE_NAME_DESCRIPTION = "description";
    public static final String NODE_NAME_CVNAME = "cvName";
    public static final String NODE_NAME_SEQUENCE = "sequence";
    public static final String NODE_NAME_XREF = "xref";
    public static final String ATTR_NAME_UNIQUE_NAME = "uniqueName";
    public static final String ATTR_NAME_DATABASE = "database";
    public static final String ATTR_NAME_ACCESSION = "accession";
    public static final String ATTR_NAME_ANNOTATION_LIST = "annotationList";
    public static final String ATTR_NAME_CATAGORY = "category";
    public static final String ATTR_NAME_FIRST = "first";
    public static final String ATTR_NAME_LAST = "last";
    public static final String ATTR_NAME_ISOFORM_REF = "isoformRef";
    public static final String ATTR_NAME_PROPERTY_NAME = "propertyName";
    public static final String ATTR_NAME_VALUE = "value";
    public static final String ATTR_VALUE_ENSEMBL = "Ensembl";
    public static final String ATTR_VALUE_REFSEQ = "RefSeq";
    public static final String ATTR_VALUE_NUCLEOTIDE_SEQUENCE_ID = "'nucleotide sequence ID";
    public static final String NEXT_PROT_DB_DIR = Gpr.HOME + "/snpEff/db/nextProt/2012_06";
    String xmlDirName;
    HashSet<String> categoryBlackList;
    HashMap<String, String> trIdByUniqueName;
    HashMap<String, String> sequenceByUniqueName;
    AutoHashMap<String, CountByType> countAaSequenceByType;
    HashMap<String, Transcript> trById;
    HashSet<String> proteinDifferences = new HashSet();
    HashSet<String> proteinOk = new HashSet();
    Markers markers = new Markers();
    Genome genome;
    int aaErrors;

    public SnpEffCmdBuildNextProt() {
        this.trIdByUniqueName = new HashMap();
        this.sequenceByUniqueName = new HashMap();
        this.countAaSequenceByType = new AutoHashMap(new CountByType());
        this.trById = new HashMap();
        this.categoryBlackList = new HashSet();
        for (String cat2 : CATAGORY_BLACK_LIST_STR) {
            this.categoryBlackList.add(cat2);
        }
    }

    void analyzeSequenceConservation() {
        if (this.verbose) {
            Timer.showStdErr("Sequence conservation analysis.\n\tAA sequence length  : 1\n\tMin AA count        : 30\n\tMin AA conservation : 0.99");
        }
        ArrayList keys = new ArrayList();
        keys.addAll(this.countAaSequenceByType.keySet());
        Collections.sort(keys);
        StringBuilder title = new StringBuilder();
        for (char aa : GprSeq.AMINO_ACIDS) {
            title.append(aa + "\t");
        }
        title.append("\t" + title);
        if (this.verbose) {
            System.out.println("Amino acid regions:\n\tTotal\tMax count\tAvg len\tConservation\tCatergory\tControlled Vocabulary\t" + title + "\tOther AA sequences:");
        }
        Object object = keys.iterator();
        while (object.hasNext()) {
            String key = (String)object.next();
            long seqLen = 0L;
            long totalSeqs = 0L;
            long maxCount = 0L;
            CountByType cbt = (CountByType)this.countAaSequenceByType.get(key);
            long total = cbt.sum();
            boolean highlyConservedAaSequence = false;
            StringBuilder sb = new StringBuilder();
            for (char aa : GprSeq.AMINO_ACIDS) {
                long count2 = cbt.get("" + aa);
                if (count2 > 0L) {
                    seqLen += 1L * count2;
                    totalSeqs += count2;
                    maxCount = Math.max(maxCount, count2);
                    sb.append(count2);
                    double perc = (double)count2 / (double)total;
                    if (perc > 0.99 && total >= 30L) {
                        highlyConservedAaSequence = true;
                    }
                }
                sb.append("\t");
            }
            Object object2 = cbt.keySet().iterator();
            while (object2.hasNext()) {
                String aas = (String)object2.next();
                long count3 = cbt.get(aas);
                double perc = (double)count3 / (double)total;
                if (aas.length() <= 1) continue;
                seqLen += (long)aas.length() * count3;
                totalSeqs += count3;
                maxCount = Math.max(maxCount, count3);
                sb.append(String.format("\t" + aas + ":" + count3, new Object[0]));
                if (!(perc > 0.99) || total < 30L) continue;
                highlyConservedAaSequence = true;
            }
            long avgLen = seqLen / totalSeqs;
            if (this.verbose) {
                System.out.println("\t" + total + "\t" + maxCount + "\t" + avgLen + "\t" + (highlyConservedAaSequence ? "High" : "") + "\t" + key + "\t" + sb);
            }
            if (!highlyConservedAaSequence) continue;
            int count4 = 0;
            for (Marker m : this.markers) {
                NextProt nextProt = (NextProt)m;
                if (!m.getId().equals(key)) continue;
                nextProt.setHighlyConservedAaSequence(true);
                ++count4;
            }
            if (!this.verbose) continue;
            Timer.showStdErr("NextProt " + count4 + " markers type '" + key + "' marked as highly conserved AA sequence");
        }
    }

    void countAaSequence(String category, String contrVoc, String description, String sequence2) {
        String key = this.key(category, contrVoc, description);
        CountByType cbt = this.countAaSequenceByType.getOrCreate(key);
        cbt.inc(sequence2);
    }

    ArrayList<Node> findNodes(Node node, String nodeName, String nodeValue, String attrName, String attrValue) {
        ArrayList<Node> resulstsList = new ArrayList<Node>();
        block5: while (node != null) {
            boolean found = false;
            short type = node.getNodeType();
            String name = node.getNodeName();
            String value2 = node.getNodeValue();
            if (value2 != null) {
                value2 = value2.replace('\n', ' ').trim();
            }
            StringBuilder attrSb = new StringBuilder();
            if (attrName != null || attrValue != null) {
                NamedNodeMap map2 = node.getAttributes();
                if (map2 != null) {
                    for (int i = 0; i < map2.getLength(); ++i) {
                        Node attr = map2.item(i);
                        if (attrSb.length() > 0) {
                            attrSb.append(", ");
                        }
                        String aname = attr.getNodeName();
                        String aval = attr.getNodeValue();
                        attrSb.append(aname + "=" + aval);
                        if (nodeName != null && (name == null || !name.equals(nodeName)) || nodeValue != null && (value2 == null || !value2.equals(nodeValue)) || attrName != null && (aname == null || !attrName.equals(aname)) || attrValue != null && (aval == null || !attrValue.equals(aval))) continue;
                        found = true;
                    }
                }
            } else if ((nodeName == null || name != null && name.equals(nodeName)) && (nodeValue == null || value2 != null && value2.equals(nodeValue))) {
                found = true;
            }
            if (found) {
                resulstsList.add(node);
            }
            switch (type) {
                case 1: {
                    NodeList nodeList = node.getChildNodes();
                    resulstsList.addAll(this.findNodes(nodeList, nodeName, nodeValue, attrName, attrValue));
                    node = node.getNextSibling();
                    continue block5;
                }
                case 3: {
                    node = null;
                    continue block5;
                }
                case 4: {
                    node = null;
                    continue block5;
                }
            }
            node = null;
        }
        return resulstsList;
    }

    List<Node> findNodes(NodeList nodeList, String nodeName, String nodeValue, String attrName, String attrValue) {
        ArrayList<Node> resulstsList = new ArrayList<Node>();
        for (int temp = 0; temp < nodeList.getLength(); ++temp) {
            Node node = nodeList.item(temp);
            resulstsList.addAll(this.findNodes(node, nodeName, nodeValue, attrName, attrValue));
        }
        return resulstsList;
    }

    Node findOneNode(Node node, String nodeName, String nodeValue, String attrName, String attrValue) {
        ArrayList<Node> resulstsList = this.findNodes(node, nodeName, nodeValue, attrName, attrValue);
        if (resulstsList.isEmpty()) {
            return null;
        }
        return resulstsList.get(0);
    }

    void findSequences(Node node) {
        ArrayList<Node> seqNodes = this.findNodes(node, NODE_NAME_SEQUENCE, null, null, null);
        for (Node seq2 : seqNodes) {
            String seqStr = this.getText(seq2);
            Node iso = seq2.getParentNode();
            String uniq = this.getAttribute(iso, ATTR_NAME_UNIQUE_NAME);
            this.sequenceByUniqueName.put(uniq, seqStr);
        }
    }

    boolean findTrIds(Node node) {
        boolean added = false;
        ArrayList<Node> ensemblTrIds = this.findNodes(node, NODE_NAME_TRANSCRIPT, null, ATTR_NAME_DATABASE, ATTR_VALUE_ENSEMBL);
        for (Node trNode : ensemblTrIds) {
            String trId = this.getAttribute(trNode, ATTR_NAME_ACCESSION);
            Node isoMap = trNode.getParentNode();
            String trUniqName = this.getAttribute(isoMap, ATTR_NAME_UNIQUE_NAME);
            this.trIdByUniqueName.put(trUniqName, trId);
            added = true;
        }
        return added;
    }

    String getAttribute(Node node, String attrName) {
        if (node == null) {
            return null;
        }
        NamedNodeMap map2 = node.getAttributes();
        if (map2 == null) {
            return null;
        }
        Node attrNode = map2.getNamedItem(attrName);
        if (attrNode == null) {
            return null;
        }
        return attrNode.getNodeValue();
    }

    String getGeneId(Node node, String uniqueName) {
        Node geneNode = this.findOneNode(node, NODE_NAME_GENE, null, ATTR_NAME_DATABASE, ATTR_VALUE_ENSEMBL);
        return this.getAttribute(geneNode, ATTR_NAME_ACCESSION);
    }

    String getText(Node n) {
        if (n == null) {
            return null;
        }
        return n.getTextContent().replace('\n', ' ').trim();
    }

    String key(String category, String contrVoc, String description) {
        category = this.vcfSafe(category);
        if (description == null || description.isEmpty()) {
            description = contrVoc;
        }
        if ((description = this.vcfSafe(description)).isEmpty()) {
            return category;
        }
        return category + ":" + description;
    }

    String nodeType(short type) {
        switch (type) {
            case 2: {
                return "ATTRIBUTE_NODE";
            }
            case 4: {
                return "CDATA_SECTION_NODE";
            }
            case 8: {
                return "COMMENT_NODE";
            }
            case 11: {
                return "DOCUMENT_FRAGMENT_NODE";
            }
            case 9: {
                return "DOCUMENT_NODE";
            }
            case 16: {
                return "DOCUMENT_POSITION_CONTAINED_BY";
            }
            case 10: {
                return "DOCUMENT_TYPE_NODE";
            }
            case 1: {
                return "ELEMENT_NODE";
            }
            case 6: {
                return "ENTITY_NODE";
            }
            case 5: {
                return "ENTITY_REFERENCE_NODE";
            }
            case 12: {
                return "NOTATION_NODE";
            }
            case 7: {
                return "PROCESSING_INSTRUCTION_NODE";
            }
            case 3: {
                return "TEXT_NODE";
            }
        }
        throw new RuntimeException("Unknown");
    }

    void parse(String xmlFileName) {
        try {
            if (this.verbose) {
                Timer.showStdErr("Reading file:" + xmlFileName);
            }
            File xmlFile = new File(xmlFileName);
            Document doc = null;
            doc = xmlFileName.endsWith(".gz") ? DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new GZIPInputStream(new FileInputStream(xmlFile))) : DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(xmlFile);
            if (this.verbose) {
                Timer.showStdErr("Normalizing XML document");
            }
            doc.getDocumentElement().normalize();
            if (this.verbose) {
                Timer.showStdErr("Parsing XML data.");
            }
            List<Node> nodeList = this.findNodes(doc.getChildNodes(), NODE_NAME_PROTEIN, null, null, null);
            if (this.verbose) {
                Timer.showStdErr("Found " + nodeList.size() + " protein nodes");
            }
            for (Node node : nodeList) {
                this.parseProteinNode(node);
            }
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    void parseAnnotation(Node ann, String geneId, String category) {
        Node descr = this.findOneNode(ann, NODE_NAME_DESCRIPTION, null, null, null);
        String description = this.getText(descr);
        if (description == null) {
            description = "";
        } else if (description.indexOf(59) > 0) {
            description = description.substring(0, description.indexOf(59));
        }
        Node cv = this.findOneNode(ann, NODE_NAME_CVNAME, null, null, null);
        String contrVoc = this.getText(cv);
        if (contrVoc == null) {
            contrVoc = "";
        }
        contrVoc.indexOf(59);
        String[] cvs = contrVoc.split(";", 2);
        String contrVoc2 = "";
        if (cvs.length > 1) {
            contrVoc = cvs[0];
            contrVoc2 = cvs[1];
        }
        ArrayList<Node> posNodes = this.findNodes(ann, NODE_NAME_POSITION, null, null, null);
        for (Node pos : posNodes) {
            String first = this.getAttribute(pos, ATTR_NAME_FIRST);
            String last2 = this.getAttribute(pos, ATTR_NAME_LAST);
            int aaStart = Gpr.parseIntSafe(first) - 1;
            int aaEnd = Gpr.parseIntSafe(last2) - 1;
            int len = aaEnd - aaStart + 1;
            Node isoAnn = pos.getParentNode().getParentNode();
            String isoformRef = this.getAttribute(isoAnn, ATTR_NAME_ISOFORM_REF);
            String sequence2 = this.sequenceByUniqueName.get(isoformRef);
            String subSeq = "";
            if (sequence2 != null && aaStart >= 0 && aaEnd >= aaStart) {
                subSeq = sequence2.substring(aaStart, aaEnd + 1);
            }
            TranscriptData trData = this.transcriptData(isoformRef, aaStart, aaEnd, sequence2, subSeq);
            if (!trData.ok || len <= 0) continue;
            if (this.debug) {
                System.out.println(geneId + "\t" + isoformRef + "\t" + trData.tr.getId() + "\t" + category + "\t" + description + "\t" + contrVoc + "\t" + contrVoc2 + "\t" + first + "\t" + last2 + "\t" + len + "\t" + trData.chrName + "\t" + trData.chrPosStart + "\t" + trData.chrPosEnd + "\t" + subSeq + "\t" + trData.codon + "\t" + trData.aa);
            }
            String id = this.key(category, contrVoc, description);
            NextProt nextProt = new NextProt(trData.tr, trData.chrPosStart, trData.chrPosEnd, id);
            if (this.debug) {
                Gpr.debug("Adding NextProt: " + nextProt);
            }
            this.markers.add(nextProt);
            this.countAaSequence(category, contrVoc, description, subSeq);
        }
    }

    void parseAnnotations(Node node, String geneId) {
        ArrayList<Node> annListNodes = this.findNodes(node, "annotationList", null, null, null);
        for (Node annListNode : annListNodes) {
            String category = this.getAttribute(annListNode, ATTR_NAME_CATAGORY);
            if (this.categoryBlackList.contains(category)) continue;
            ArrayList<Node> annNodes = this.findNodes(annListNode, NODE_NAME_ANNOTATION, null, null, null);
            for (Node ann : annNodes) {
                this.parseAnnotation(ann, geneId, category);
            }
        }
    }

    @Override
    public void parseArgs(String[] args) {
        this.args = args;
        if (args.length <= 0) {
            this.usage(null);
        }
        for (int i = 0; i < args.length; ++i) {
            String arg = args[i];
            if (this.isOpt(arg)) {
                this.usage("Unknonwn option '" + arg + "'");
                continue;
            }
            if (this.genomeVer == null || this.genomeVer.isEmpty()) {
                this.genomeVer = args[i];
                continue;
            }
            if (this.xmlDirName != null && !this.xmlDirName.isEmpty()) continue;
            this.xmlDirName = args[i];
        }
        if (this.genomeVer == null || this.genomeVer.isEmpty()) {
            this.usage("Missing genome version");
        }
        if (this.xmlDirName == null || this.xmlDirName.isEmpty()) {
            this.usage("Missing nextProt XML dir");
        }
    }

    void parseProteinNode(Node node) {
        String geneId;
        String uniqueName = this.getAttribute(node, ATTR_NAME_UNIQUE_NAME);
        if (this.debug) {
            Timer.showStdErr("Parsing protein node: " + uniqueName);
        }
        if ((geneId = this.getGeneId(node, uniqueName)) != null && this.findTrIds(node)) {
            this.findSequences(node);
            this.parseAnnotations(node, geneId);
        }
    }

    @Override
    public boolean run() {
        String[] files;
        this.loadConfig();
        this.loadDb();
        this.genome = this.config.getGenome();
        if (this.verbose) {
            Timer.showStdErr("done");
        }
        for (Gene gene : this.config.getSnpEffectPredictor().getGenome().getGenes()) {
            for (Transcript tr : gene) {
                this.trById.put(tr.getId(), tr);
            }
        }
        if (this.verbose) {
            Timer.showStdErr("Reading NextProt files from directory '" + this.xmlDirName + "'");
        }
        if ((files = new File(this.xmlDirName).list()) != null) {
            for (String xmlFileName : files) {
                if (this.verbose) {
                    Timer.showStdErr("\tNextProt file '" + xmlFileName + "'");
                }
                if (!xmlFileName.endsWith(".xml.gz") && !xmlFileName.endsWith(".xml")) continue;
                String path = this.xmlDirName + "/" + xmlFileName;
                this.parse(path);
            }
        } else {
            this.fatalError("No XML files found in directory '" + this.xmlDirName + "'");
        }
        if (this.verbose) {
            Timer.showStdErr("Proteing sequences:\n\tMatch       : " + this.proteinOk.size() + "\n\tDifferences : " + this.proteinDifferences.size() + "\n\tAA errros   : " + this.aaErrors);
        }
        this.analyzeSequenceConservation();
        this.save();
        if (this.verbose) {
            Timer.showStdErr("Done!");
        }
        return true;
    }

    void save() {
        String nextProtBinFile = this.config.getDirDataVersion() + "/nextProt.bin";
        if (this.verbose) {
            Timer.showStdErr("Saving database to file '" + nextProtBinFile + "'");
        }
        HashSet<Chromosome> chromos = new HashSet<Chromosome>();
        for (Marker m : this.markers) {
            chromos.add(m.getChromosome());
        }
        Markers markersToSave = new Markers();
        markersToSave.add(this.genome);
        for (Chromosome chr : chromos) {
            markersToSave.add(chr);
        }
        for (Marker m : this.markers) {
            markersToSave.add(m);
        }
        markersToSave.save(nextProtBinFile);
    }

    String toString(Node node) {
        StringBuilder sb = new StringBuilder();
        String name = node.getNodeName();
        String value2 = node.getNodeValue();
        if (value2 != null) {
            value2 = value2.replace('\n', ' ').trim();
        }
        sb.append(name);
        NamedNodeMap map2 = node.getAttributes();
        if (map2 != null) {
            sb.append("( ");
            for (int i = 0; i < map2.getLength(); ++i) {
                Node attr = map2.item(i);
                String aname = attr.getNodeName();
                String aval = attr.getNodeValue();
                if (i > 0) {
                    sb.append(", ");
                }
                sb.append(aname + "='" + aval + "'");
            }
            sb.append(" )");
        }
        if (value2 != null) {
            sb.append(" = '" + value2 + "'\n");
        }
        return sb.toString();
    }

    TranscriptData transcriptData(String isoformRef, int aaStart, int aaEnd, String sequence2, String subSeq) {
        Transcript tr;
        String trId = this.trIdByUniqueName.get(isoformRef);
        TranscriptData trData = new TranscriptData();
        if (trId != null && (tr = this.trById.get(trId)) != null) {
            trData.tr = tr;
            String protein = tr.protein();
            if (!protein.isEmpty() && protein.charAt(protein.length() - 1) == '*') {
                protein = protein.substring(0, protein.length() - 1);
            }
            if (protein.equals(sequence2)) {
                this.proteinOk.add(trId);
                if (aaStart >= 0 && aaEnd >= aaStart) {
                    int[] cdsBase2Pos = tr.baseNumberCds2Pos();
                    int codonStart = aaStart * 3;
                    int codonEnd = (aaEnd + 1) * 3 - 1;
                    if (tr.isStrandPlus()) {
                        trData.chrPosStart = cdsBase2Pos[codonStart];
                        trData.chrPosEnd = cdsBase2Pos[codonEnd];
                    } else {
                        trData.chrPosStart = cdsBase2Pos[codonEnd];
                        trData.chrPosEnd = cdsBase2Pos[codonStart];
                    }
                    trData.chrName = tr.getChromosomeName();
                    trData.codon = tr.cds().substring(codonStart, codonEnd + 1);
                    trData.aa = CodonTables.getInstance().aa(trData.codon, this.genome, trData.chrName);
                    if (!subSeq.equals(trData.aa) && this.verbose) {
                        Timer.showStdErr("WARNING: AA differ: \tUniqueName" + isoformRef + "\tEnsembl ID: " + trId + "\tEnsembl  AA: " + trData.aa + "\tNextProt AA:" + subSeq + "\n");
                    } else {
                        trData.ok = true;
                    }
                }
            } else {
                if (!this.proteinDifferences.contains(trId) && this.verbose) {
                    Timer.showStdErr("WARNING: Protein sequences differ: \tUniqueName" + isoformRef + "\tEnsembl ID: " + trId + "\n\tEnsembl  (" + protein.length() + "): " + protein + "\n\tNextProt (" + sequence2.length() + "): " + sequence2 + "\n");
                }
                this.proteinDifferences.add(trId);
            }
        }
        return trData;
    }

    @Override
    public void usage(String message) {
        if (message != null) {
            System.err.println("Error        :\t" + message);
        }
        System.err.println("snpEff version SnpEff 4.1i (build 2015-08-14), by Pablo Cingolani");
        System.err.println("Usage: snpEff buildNextProt [options] genome_version nextProt_XML_dir");
        System.exit(-1);
    }

    String vcfSafe(String str) {
        return str.trim().replaceAll("(,|;|=| |\t)+", "_");
    }
}

