/*
 * Decompiled with CFR 0.152.
 */
package umcg.genetica.io.trityper.converters;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Pattern;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.WGAFileMatrixGenotype;
import umcg.genetica.io.trityper.util.BaseAnnot;
import umcg.genetica.io.trityper.util.ChrAnnotation;
import umcg.genetica.text.Strings;

public class VCFToTriTyper {
    Integer[] colToIndId = null;
    private HashMap<String, Integer> individualMap;
    private ArrayList<String> individuals;
    private HashMap<String, Integer> snpMap;
    private HashMap<String, Byte> snpChrMap;
    private HashMap<String, Integer> snpChrPosMap;
    private ArrayList<String> snpList;
    private int multiallelicSNPsExcluded;
    private int snpctr;
    private int finalNrInds;
    private Pattern snppattern;
    private static final Pattern zero = Pattern.compile("0");
    private static final Pattern one = Pattern.compile("1");

    public void parse(String dir, String oudir) throws IOException {
        this.parse(dir, oudir, null);
    }

    public void parse(String dir, String outdir, String snpPatternString) throws IOException {
        if (snpPatternString == null) {
            this.snppattern = null;
            System.out.println("All variants are assumed to be SNPs");
        } else {
            this.snppattern = Pattern.compile(snpPatternString);
            System.out.println("Variants are filtered for SNPs using this pattern on the INFO column: " + snpPatternString);
        }
        if (!Gpio.exists(dir)) {
            throw new IOException("Error: could not find dir: " + dir);
        }
        if (!outdir.endsWith("/")) {
            outdir = outdir + "/";
        }
        Gpio.createDir(outdir);
        String[] files = Gpio.getListOfFiles(dir);
        ArrayList<String> finalFiles = new ArrayList<String>();
        for (String file : files) {
            if (!file.endsWith(".vcf") && !file.endsWith(".vcf.gz")) continue;
            finalFiles.add(file);
        }
        files = finalFiles.toArray(new String[0]);
        System.out.println("Found " + files.length + " vcf files");
        if (files.length == 0) {
            System.exit(0);
        }
        for (String file : files) {
            this.parseFile(dir, file, outdir);
        }
    }

    private void parseFile(String dir, String file, String outdir) throws IOException {
        String[] fileNameElems = Strings.dot.split(file);
        String chrNum = "";
        for (String f : fileNameElems) {
            String lowerCaseF = f.toLowerCase();
            if (!lowerCaseF.contains("chr")) continue;
            chrNum = "Chr" + lowerCaseF.replaceAll("[^\\d.]", "");
        }
        file = dir + "/" + file;
        outdir = outdir + chrNum + "/";
        Gpio.createDir(outdir);
        System.out.println("Will write to " + outdir);
        this.snpMap = new HashMap();
        this.snpChrMap = new HashMap();
        this.snpChrPosMap = new HashMap();
        this.snpctr = 0;
        int filecounter = 0;
        HashMap<String, Integer> individualsInFilesCounter = new HashMap<String, Integer>();
        HashSet<String> uniqueIndividuals = new HashSet<String>();
        System.out.println("Parsing file: " + file);
        TextFile tf = new TextFile(file, false);
        boolean indcounter = false;
        String[] elems = tf.readLineElems(TextFile.tab);
        while (elems != null) {
            if (!elems[0].startsWith("##") && elems[0].startsWith("#CHROM")) {
                if (elems.length <= 9) break;
                for (int i = 9; i < elems.length; ++i) {
                    String sample = elems[i];
                    Integer numFilesPresent = (Integer)individualsInFilesCounter.get(sample);
                    if (numFilesPresent == null) {
                        numFilesPresent = 0;
                    }
                    Integer n = numFilesPresent;
                    Integer n2 = numFilesPresent = Integer.valueOf(numFilesPresent + 1);
                    individualsInFilesCounter.put(sample, numFilesPresent);
                    uniqueIndividuals.add(sample);
                }
                break;
            }
            elems = tf.readLineElems(TextFile.tab);
        }
        tf.close();
        String[] uniqueIndividualsArray = uniqueIndividuals.toArray(new String[0]);
        this.individualMap = new HashMap();
        this.individuals = new ArrayList();
        int indIdCtr = 0;
        for (String individual : uniqueIndividualsArray) {
            Integer ctr = (Integer)individualsInFilesCounter.get(individual);
            this.individualMap.put(individual, indIdCtr);
            this.individuals.add(individual);
            ++indIdCtr;
        }
        System.out.println("Total number of detected individuals: " + this.individuals.size());
        System.out.println("Now writing individuals to output directory");
        TextFile indOut = new TextFile(outdir + "Individuals.txt", true);
        TextFile indPhenoOut = new TextFile(outdir + "PhenotypeInformation.txt", true);
        for (String ind : this.individuals) {
            indOut.writeln(ind);
            indPhenoOut.writeln(ind + "\tunknown\tinclude\tunknown");
        }
        indOut.close();
        indPhenoOut.close();
        this.multiallelicSNPsExcluded = 0;
        this.snpList = new ArrayList();
        System.out.println("Parsing file: " + file);
        int lnctr = 0;
        tf = new TextFile(file, false);
        this.colToIndId = null;
        elems = tf.readLineElemsReturnObjects(TextFile.tab);
        boolean nrSNPs = false;
        while (elems != null) {
            if (!elems[0].startsWith("##")) {
                if (elems[0].startsWith("#CHROM")) {
                    this.parseHeaderLine(elems);
                } else {
                    this.parseVCFSNPLine(elems, true);
                }
            }
            elems = tf.readLineElemsReturnObjects(TextFile.tab);
            if (++lnctr % 500000 != 0) continue;
            System.out.println("Parsed\t" + this.snpList.size() + "\tsnps.");
        }
        tf.close();
        ++filecounter;
        System.out.println(this.snpMap.size() + "\tsnps detected");
        System.out.println(this.multiallelicSNPsExcluded + "\tmulti allelic SNPs excluded.");
        System.out.println("Final totals: ");
        System.out.println(this.snpMap.size() + "\tsnps detected");
        System.out.println(this.multiallelicSNPsExcluded + "\tmulti allelic SNPs excluded.");
        System.out.println("Now writing snps to output directory!");
        String[] availableSNPs = this.snpList.toArray(new String[this.snpList.size()]);
        TextFile snpout = new TextFile(outdir + "SNPs.txt", true);
        TextFile snpmapout = new TextFile(outdir + "SNPMappings.txt", true);
        for (String snp : availableSNPs) {
            snpout.writeln(snp);
            snpmapout.writeln(this.snpChrMap.get(snp) + "\t" + this.snpChrPosMap.get(snp) + "\t" + snp);
        }
        snpout.close();
        snpmapout.close();
        this.finalNrInds = this.individuals.size();
        String outfilename = outdir + "GenotypeMatrix.dat";
        WGAFileMatrixGenotype genotypefile = new WGAFileMatrixGenotype(this.snpctr, this.individuals.size(), new File(outfilename), false);
        filecounter = 0;
        ProgressBar pb = new ProgressBar(lnctr, "writing genotypes from file: " + file);
        tf = new TextFile(file, false);
        elems = tf.readLineElems(TextFile.tab);
        this.colToIndId = null;
        lnctr = 0;
        this.snpctr = 0;
        while (elems != null) {
            if (!elems[0].startsWith("##")) {
                if (elems[0].startsWith("#CHROM")) {
                    this.parseHeaderLine(elems);
                } else {
                    Pair<byte[], byte[]> p;
                    Integer snpid;
                    String snp;
                    snp = elems[2];
                    if (snp.equals(".")) {
                        snp = elems[0] + "_" + elems[1];
                    }
                    if ((snpid = this.snpMap.get(snp)) != null && (p = this.parseVCFSNPLine(elems, false)) != null) {
                        genotypefile.setAlleles(snpid, p.getLeft(), p.getRight());
                        ++this.snpctr;
                    }
                }
            }
            elems = tf.readLineElems(TextFile.tab);
            pb.set(lnctr);
            ++lnctr;
        }
        pb.close();
        ++filecounter;
        tf.close();
        System.out.println("");
        genotypefile.close();
    }

    private Pair<byte[], byte[]> parseVCFSNPLine(String[] elems, boolean inventory) {
        boolean isSNP;
        String info = elems[7];
        String[] infoelems = Strings.semicolon.split(info);
        if (this.snppattern == null) {
            isSNP = true;
        } else {
            isSNP = false;
            for (String infoelem : infoelems) {
                if (!this.snppattern.matcher(infoelem).matches()) continue;
                isSNP = true;
            }
        }
        if (isSNP) {
            String snp = elems[2];
            if (Strings.dot.matcher(snp).matches()) {
                snp = elems[0] + "_" + elems[1];
            }
            String ref = elems[3];
            String alt = elems[4];
            if (Strings.comma.split(alt).length == 1 && Strings.comma.split(ref).length == 1) {
                byte refb = BaseAnnot.toByte(elems[3]);
                byte altb = BaseAnnot.toByte(elems[4]);
                if (refb == 0 || altb == 0) {
                    System.err.println("WARNING: could not properly parse reference or alternative allele for snp\t" + snp + "\t" + elems[3] + "-" + elems[4]);
                } else {
                    if (!inventory && this.snpMap.containsKey(snp)) {
                        byte[] allele1 = new byte[this.finalNrInds];
                        byte[] allele2 = new byte[this.finalNrInds];
                        for (int i = 9; i < elems.length; ++i) {
                            Integer indId = this.colToIndId[i];
                            if (indId == null) continue;
                            String[] gtElems = Strings.colon.split(elems[i]);
                            String[] genotypes = Strings.pipe.split(gtElems[0]);
                            if (genotypes.length == 1) {
                                genotypes = Strings.forwardslash.split(gtElems[0]);
                            }
                            if (genotypes.length == 1 || genotypes.length > 2) {
                                System.err.println("WARNING: genotype could not be parsed for sample " + this.individuals.get(indId) + "\t" + gtElems[0]);
                                continue;
                            }
                            if (zero.matcher(genotypes[0]).matches()) {
                                allele1[indId.intValue()] = refb;
                            } else if (one.matcher(genotypes[0]).matches()) {
                                allele1[indId.intValue()] = altb;
                            } else if (Strings.dot.matcher(genotypes[0]).matches()) {
                                allele1[indId.intValue()] = 0;
                            } else {
                                System.err.println("Could not parse allele1 of genotype for sample " + this.individuals.get(indId) + "\t" + gtElems[0]);
                            }
                            if (zero.matcher(genotypes[1]).matches()) {
                                allele2[indId.intValue()] = refb;
                                continue;
                            }
                            if (one.matcher(genotypes[1]).matches()) {
                                allele2[indId.intValue()] = altb;
                                continue;
                            }
                            if (Strings.dot.matcher(genotypes[1]).matches()) {
                                allele2[indId.intValue()] = 0;
                                continue;
                            }
                            System.err.println("Could not parse allele2 of genotype for sample " + this.individuals.get(indId) + "\t" + gtElems[0]);
                        }
                        return new Pair<byte[], byte[]>(allele1, allele2);
                    }
                    if (inventory) {
                        if (this.snpMap.containsKey(snp)) {
                            System.err.println("WARNING: " + snp + " already parsed?\n" + Strings.concat(elems, Strings.tab));
                        } else {
                            Byte chr = ChrAnnotation.parseChr(elems[0]);
                            Integer pos = Integer.parseInt(elems[1]);
                            this.snpList.add(snp);
                            this.snpMap.put(snp, this.snpctr);
                            this.snpChrMap.put(snp, chr);
                            this.snpChrPosMap.put(snp, pos);
                            ++this.snpctr;
                        }
                    }
                }
            } else {
                ++this.multiallelicSNPsExcluded;
                System.out.println("SNP " + snp + " is multi-allelic, therefore exlcuding it! " + ref + "\t" + alt);
            }
        }
        return null;
    }

    private void parseHeaderLine(String[] elems) {
        this.colToIndId = new Integer[elems.length];
        for (int i = 9; i < elems.length; ++i) {
            Integer indId;
            String sample = elems[i];
            this.colToIndId[i] = indId = this.individualMap.get(sample);
        }
    }
}

