/*
 * Decompiled with CFR 0.152.
 */
package umcg.genetica.io.trityper.util;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGenotypeData;
import umcg.genetica.io.trityper.WGAFileMatrixGenotype;
import umcg.genetica.io.trityper.util.BaseAnnot;
import umcg.genetica.io.trityper.util.ChrAnnotation;
import umcg.genetica.io.trityper.util.CompareAllelicDirections;

public class TriTyperGenotypeDataMerger {
    public static void main(String[] args) {
        try {
            TriTyperGenotypeDataMerger merger = new TriTyperGenotypeDataMerger();
            String[] datasets = new String[]{"D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto\\", "D:\\UMCG\\SAT-VAT-Liver-Muscle-ImputeTriTyper\\Liver\\LiverCyto2\\"};
            String outdir = "/Volumes/iSnackHD/Data/GeneticalGenomicsDatasets/SatVatLiverMuscle/LiverOmni/CytoAndOmniSampleMerge/";
            merger.mergeDatasetsOnCommonSamples(datasets, outdir);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void combinePrioritizerDatasetsMergeCommonSNPs(String baseDir1, String baseDir2, String outputDir, String snps) throws IOException {
        String include;
        String affectionStatus;
        String sex;
        String individual;
        int ind;
        System.out.println("TriTyper Dataset Combiner");
        System.out.println("\n\n");
        System.out.println("Starting to combine dataset '" + baseDir1 + "' and dataset '" + baseDir2 + "'");
        System.out.println("The output directory will be placed in '" + outputDir + "'");
        System.out.println("");
        outputDir = Gpio.formatAsDirectory(outputDir);
        Gpio.createDir(outputDir);
        HashSet<String> hashSNPsConfine = new HashSet<String>();
        if (snps != null) {
            System.out.println("Loading snp file from " + snps);
            TextFile snpfile = new TextFile(snps, false);
            String[] snpstoquery = snpfile.readAsArray();
            hashSNPsConfine.addAll(Arrays.asList(snpstoquery));
            snpfile.close();
            System.out.println("Will merge at most " + hashSNPsConfine.size() + " snps.");
        }
        System.out.println("\nLoading data from dataset 1:");
        TriTyperGenotypeData genotypeDataset1 = new TriTyperGenotypeData();
        genotypeDataset1.load(baseDir1);
        System.out.println("");
        System.out.println("\nLoading data from dataset 2:");
        TriTyperGenotypeData genotypeDataset2 = new TriTyperGenotypeData();
        genotypeDataset2.load(baseDir2);
        System.out.println("\n\n");
        ArrayList<String> vectorSNP = new ArrayList<String>();
        String[] snps1 = genotypeDataset1.getSNPs();
        for (int snpID1 = 0; snpID1 < snps1.length; ++snpID1) {
            String rsName = snps1[snpID1];
            Integer snp2Id = genotypeDataset2.getSnpToSNPId().get((Object)rsName);
            if (snp2Id == -9) continue;
            if (hashSNPsConfine.isEmpty()) {
                vectorSNP.add(rsName);
                continue;
            }
            if (!hashSNPsConfine.contains(rsName)) continue;
            vectorSNP.add(rsName);
        }
        System.out.println("Number of unique SNPs or probes that are present in both datasets, and will be included in combined dataset:\t" + vectorSNP.size());
        System.out.println("\n\n");
        System.out.println("\nCombining phenotype information files:");
        HashMap<String, Integer> hashInd = new HashMap<String, Integer>();
        ArrayList<String> vectorInd = new ArrayList<String>();
        TextFile phenotypeInformationOut = new TextFile(outputDir + "PhenotypeInformation.txt", true);
        String[] inds1 = genotypeDataset1.getIndividuals();
        String[] inds2 = genotypeDataset2.getIndividuals();
        for (ind = 0; ind < inds1.length; ++ind) {
            individual = inds1[ind];
            sex = "male";
            if (genotypeDataset1.getIsFemale()[ind] == null) {
                System.out.println(individual + " is missing phenotype status. Exiting!");
                System.exit(0);
            }
            if (genotypeDataset1.getIsFemale()[ind].booleanValue()) {
                sex = "female";
            }
            affectionStatus = "unknown";
            affectionStatus = genotypeDataset1.getIsCase()[ind] == null ? "unknown" : (genotypeDataset1.getIsCase()[ind] != false ? "case" : "control");
            include = "include";
            if (!genotypeDataset1.getIsIncluded()[ind].booleanValue()) {
                include = "exclude";
            }
            phenotypeInformationOut.write(individual + "\t" + affectionStatus + "\t" + include + "\t" + sex + "\n");
            hashInd.put(individual, new Integer(vectorInd.size()));
            vectorInd.add(individual);
        }
        for (ind = 0; ind < inds2.length; ++ind) {
            individual = inds2[ind];
            sex = "male";
            if (genotypeDataset2.getIsFemale()[ind] == null) {
                System.out.println(individual + " is missing phenotype status. Exiting!");
                System.exit(0);
            }
            if (genotypeDataset2.getIsFemale()[ind].booleanValue()) {
                sex = "female";
            }
            affectionStatus = "unknown";
            affectionStatus = genotypeDataset2.getIsCase()[ind] != false ? "case" : "control";
            include = "include";
            if (!genotypeDataset2.getIsIncluded()[ind].booleanValue()) {
                include = "exclude";
            }
            phenotypeInformationOut.write(individual + "\t" + affectionStatus + "\t" + include + "\t" + sex + "\n");
            hashInd.put(individual, new Integer(vectorInd.size()));
            vectorInd.add(individual);
        }
        System.out.println("Total number of individuals:\t" + vectorInd.size());
        phenotypeInformationOut.close();
        int numSamples = vectorInd.size();
        System.out.println("\nWriting combined individuals to file:");
        TextFile outInd = new TextFile(outputDir + "Individuals.txt", true);
        for (int ind2 = 0; ind2 < vectorInd.size(); ++ind2) {
            outInd.write((String)vectorInd.get(ind2) + "\n");
            if (ind2 % 5 != 4) continue;
            System.out.print(".");
        }
        System.out.println("");
        outInd.close();
        System.out.println("\nWriting unique SNP / probe definition to file:");
        TextFile outSNP = new TextFile(outputDir + "SNPs.txt", true);
        for (int snp = 0; snp < vectorSNP.size(); ++snp) {
            outSNP.write((String)vectorSNP.get(snp) + "\n");
            if (snp % 2000 != 1999) continue;
            System.out.print(".");
        }
        System.out.println("");
        outSNP.close();
        byte[] complementAllele = new byte[256];
        complementAllele[84] = 65;
        complementAllele[65] = 84;
        complementAllele[67] = 71;
        complementAllele[71] = 67;
        System.out.println("\nCombining genotype and raw data from both analyses:");
        WGAFileMatrixGenotype fileMatrixGenotype = new WGAFileMatrixGenotype(vectorSNP.size(), numSamples, new File(outputDir + "GenotypeMatrix.dat"), false);
        SNPLoader loader1 = genotypeDataset1.createSNPLoader();
        SNPLoader loader2 = genotypeDataset2.createSNPLoader();
        for (int x = 0; x < vectorSNP.size(); ++x) {
            if (x % 10000 == 0) {
                System.out.println("Number of unique SNPs parsed so far:\t" + x);
            }
            String rsName = (String)vectorSNP.get(x);
            int snpID1 = genotypeDataset1.getSnpToSNPId().get((Object)rsName);
            int snpID2 = genotypeDataset2.getSnpToSNPId().get((Object)rsName);
            SNP snpDataObject1 = genotypeDataset1.getSNPObject(snpID1);
            loader1.loadGenotypes(snpDataObject1);
            SNP snpDataObject2 = genotypeDataset2.getSNPObject(snpID2);
            loader2.loadGenotypes(snpDataObject2);
            byte[] alleles = new byte[4];
            int alleleItr = 0;
            int[][] alleleCount = new int[2][3];
            for (int individualID = 0; individualID < genotypeDataset1.getIndividuals().length; ++individualID) {
                int i;
                byte allele1Byte = snpDataObject1.getAllele1()[individualID];
                byte allele2Byte = snpDataObject1.getAllele2()[individualID];
                if (allele1Byte == 0 || allele2Byte == 0 || allele1Byte == 48 || allele2Byte == 48) continue;
                int allelecode1 = -1;
                int allelecode2 = -1;
                for (i = 0; i < 2; ++i) {
                    if (alleles[i] != allele1Byte) continue;
                    allelecode1 = i;
                }
                if (allelecode1 == -1) {
                    alleles[alleleItr] = allele1Byte;
                    allelecode1 = alleleItr++;
                }
                for (i = 0; i < 2; ++i) {
                    if (alleles[i] != allele2Byte) continue;
                    allelecode2 = i;
                }
                if (allelecode2 == -1) {
                    alleles[alleleItr] = allele2Byte;
                    allelecode2 = alleleItr++;
                }
                int[] nArray = alleleCount[0];
                int n = allelecode2;
                nArray[n] = nArray[n] + 1;
                int[] nArray2 = alleleCount[0];
                int n2 = allelecode1;
                nArray2[n2] = nArray2[n2] + 1;
            }
            boolean allelesDifferent = false;
            int oldAlleleItr = alleleItr;
            for (int individualID = 0; individualID < genotypeDataset2.getIndividuals().length; ++individualID) {
                int i;
                byte allele1Byte = snpDataObject2.getAllele1()[individualID];
                byte allele2Byte = snpDataObject2.getAllele2()[individualID];
                if (allele1Byte == 0 || allele2Byte == 0 || allele1Byte == 48 || allele2Byte == 48) continue;
                int allelecode1 = -1;
                int allelecode2 = -1;
                for (i = 0; i < alleleItr; ++i) {
                    if (alleles[i] != allele1Byte) continue;
                    allelecode1 = i;
                }
                if (allelecode1 == -1) {
                    alleles[alleleItr] = allele1Byte;
                    allelecode1 = alleleItr++;
                    if (alleleItr > 2) {
                        allelesDifferent = true;
                    }
                }
                for (i = 0; i < alleleItr; ++i) {
                    if (alleles[i] != allele2Byte) continue;
                    allelecode2 = i;
                }
                if (allelecode2 != -1) continue;
                alleles[alleleItr] = allele2Byte;
                allelecode2 = alleleItr++;
                if (alleleItr <= 2) continue;
                allelesDifferent = true;
            }
            alleleItr = oldAlleleItr;
            boolean alleleDifferenceError = false;
            for (int individualID = 0; individualID < genotypeDataset2.getIndividuals().length; ++individualID) {
                int i;
                byte allele1Byte = snpDataObject2.getAllele1()[individualID];
                byte allele2Byte = snpDataObject2.getAllele2()[individualID];
                if (allele1Byte == 0 || allele2Byte == 0 || allele1Byte == 48 || allele2Byte == 48) continue;
                if (allelesDifferent) {
                    allele1Byte = complementAllele[allele1Byte];
                    allele2Byte = complementAllele[allele2Byte];
                }
                int allelecode1 = -1;
                int allelecode2 = -1;
                for (i = 0; i < 2; ++i) {
                    if (alleles[i] != allele1Byte) continue;
                    allelecode1 = i;
                }
                if (allelecode1 == -1) {
                    alleles[alleleItr] = allele1Byte;
                    allelecode1 = alleleItr++;
                    if (alleleItr > 2) {
                        alleleDifferenceError = true;
                        break;
                    }
                }
                for (i = 0; i < 2; ++i) {
                    if (alleles[i] != allele2Byte) continue;
                    allelecode2 = i;
                }
                if (allelecode2 == -1) {
                    alleles[alleleItr] = allele2Byte;
                    allelecode2 = alleleItr++;
                    if (alleleItr > 2) {
                        alleleDifferenceError = true;
                        break;
                    }
                }
                int[] nArray = alleleCount[1];
                int n = allelecode2;
                nArray[n] = nArray[n] + 1;
                int[] nArray3 = alleleCount[1];
                int n3 = allelecode1;
                nArray3[n3] = nArray3[n3] + 1;
            }
            double maf1 = (double)alleleCount[0][0] / (double)(alleleCount[0][0] + alleleCount[0][1]);
            double maf2 = (double)alleleCount[1][0] / (double)(alleleCount[1][0] + alleleCount[1][1]);
            if (alleleDifferenceError) {
                System.out.println("\nError! SNP\t" + rsName + "\thas more than two different alleles in the two datasets, excluding this SNP!!!");
                System.out.println("Please ensure that you used the same genotype allele naming convention within BeadStudio.");
                System.out.println("It is highly recommended to only use datasets that have been generated based on the Illumina 'TOP Allele' naming convention.");
                alleleCount[0][0] = 0;
                alleleCount[1][0] = 0;
                alleleCount[0][1] = 0;
                alleleCount[1][1] = 0;
                continue;
            }
            if (alleleCount[0][0] > alleleCount[0][1] && alleleCount[1][0] < alleleCount[1][1] && (maf1 / maf2 > 1.5 || maf2 / maf1 > 1.5)) {
                System.out.println("Warning: " + rsName + " has quite different allele frequency between dataset 1 (Allele freq. = " + maf1 + ") and dataset 2 (Same allele freq. = " + maf2 + ")");
            }
            byte[] allele1 = new byte[numSamples];
            byte[] allele2 = new byte[numSamples];
            for (int y = 0; y < numSamples; ++y) {
                if (y < genotypeDataset1.getIndividuals().length) {
                    allele1[y] = snpDataObject1.getAllele1()[y];
                    allele2[y] = snpDataObject1.getAllele2()[y];
                    continue;
                }
                allele1[y] = snpDataObject2.getAllele1()[y - genotypeDataset1.getIndividuals().length];
                allele2[y] = snpDataObject2.getAllele2()[y - genotypeDataset1.getIndividuals().length];
                if (!allelesDifferent) continue;
                allele1[y] = complementAllele[allele1[y]];
                allele2[y] = complementAllele[allele2[y]];
            }
            fileMatrixGenotype.setAllele1(x, 0, allele1);
            fileMatrixGenotype.setAllele2(x, 0, allele2);
        }
        fileMatrixGenotype.close();
        System.out.println("Number of unique SNPs parsed in total:t" + vectorSNP.size());
        System.out.println("\nCombining of TriTyper datasets has finished. Please ensure the errors and warnings that might have been observed are acceptable.");
        System.out.println("\nPlease ensure you copy a SNPMappings.txt file to the outputfolder holding the combined dataset.");
        System.out.println("After copying this file, you should be able to run TriTyper on these two combined datasets.");
    }

    public void mergeDatasetsOnCommonSamples(String[] datasetLocations, String outputDir) throws IOException {
        int d;
        int d2;
        if (datasetLocations.length < 2) {
            throw new IllegalArgumentException("Error: Nothing to combine, only one dataset presented to the program.");
        }
        if (outputDir == null) {
            throw new IllegalArgumentException("Error: No outputdir selected.");
        }
        if (!outputDir.endsWith("/")) {
            outputDir = outputDir + "/";
        }
        Gpio.createDir(outputDir);
        TextFile log = new TextFile(outputDir + "log.txt", true);
        TriTyperGenotypeData[] ds = new TriTyperGenotypeData[datasetLocations.length];
        for (int d3 = 0; d3 < ds.length; ++d3) {
            log.writeln("Loading\t: " + datasetLocations[d3]);
            System.out.println("Loading\t: " + datasetLocations[d3]);
            ds[d3] = new TriTyperGenotypeData();
            ds[d3].load(datasetLocations[d3]);
            log.writeln("SNPs\t: " + ds[d3].getSNPs().length);
            log.writeln("Samples\t: " + ds[d3].getIndividuals().length);
            log.writeln();
        }
        HashSet<String> duplicateIndividualsOverAllDatasets = new HashSet<String>();
        for (int d4 = 0; d4 < ds.length; ++d4) {
            HashSet<String> visitedIndividuals = new HashSet<String>();
            String[] individualsInDatasetD = ds[d4].getIndividuals();
            for (int indId = 0; indId < individualsInDatasetD.length; ++indId) {
                String s = individualsInDatasetD[indId];
                if (visitedIndividuals.contains(s)) {
                    duplicateIndividualsOverAllDatasets.add(s);
                    log.writeln("Dataset\t" + d4 + "\tcontains duplicate sample which will be excluded:\t" + s);
                    System.out.println("Dataset\t" + d4 + "\tcontains duplicate sample which will be excluded:\t" + s);
                }
                visitedIndividuals.add(s);
            }
        }
        HashMap<String, Object> individualCounterAcrossDatasets = new HashMap<String, Object>();
        HashSet<String> uniqueIndividualsInAllDatasets = new HashSet<String>();
        for (int d5 = 0; d5 < ds.length; ++d5) {
            String[] individualsInDatasetD = ds[d5].getIndividuals();
            for (int indId = 0; indId < individualsInDatasetD.length; ++indId) {
                String ind = individualsInDatasetD[indId];
                if (!ds[d5].getIsIncluded()[indId].booleanValue() || duplicateIndividualsOverAllDatasets.contains(ind)) continue;
                Object ctr = (Integer)individualCounterAcrossDatasets.get(ind);
                if (ctr == null) {
                    ctr = 0;
                }
                Object object = ctr;
                ctr = (Integer)ctr + 1;
                Integer n = ctr;
                individualCounterAcrossDatasets.put(ind, ctr);
                uniqueIndividualsInAllDatasets.add(ind);
            }
        }
        duplicateIndividualsOverAllDatasets = null;
        String[] availableIndividualsForAllDatasets = uniqueIndividualsInAllDatasets.toArray(new String[0]);
        HashMap<String, Integer> includedIndividualsToId = new HashMap<String, Integer>();
        ArrayList<String> includedIndividuals = new ArrayList<String>();
        int indId = 0;
        for (String s : availableIndividualsForAllDatasets) {
            Integer ctr = (Integer)individualCounterAcrossDatasets.get(s);
            if (ctr != null && ctr > 1 && ctr < ds.length + 1) {
                includedIndividualsToId.put(s, indId);
                includedIndividuals.add(s);
                ++indId;
                continue;
            }
            log.writeln("Individual\t" + s + "\tis excluded because it is either a duplicate or because it is present " + ctr + " times, while expected num == " + ds.length);
            System.out.println("Individual\t" + s + "\tis excluded because it is either a duplicate or because it is present " + ctr + " times, while expected num == " + ds.length);
        }
        availableIndividualsForAllDatasets = null;
        individualCounterAcrossDatasets = null;
        uniqueIndividualsInAllDatasets = null;
        log.writeln("Number of samples that are shared by all datasets:\t" + includedIndividuals.size());
        System.out.println("Number of samples that are shared by all datasets:\t" + includedIndividuals.size());
        if (includedIndividuals.isEmpty()) {
            log.writeln("Nothing to merge, since no included dataset shares at least 1 individual");
            System.out.println("Nothing to merge, since no included dataset shares at least 1 individual");
            log.close();
            System.exit(0);
        }
        HashSet<String> visitedSNPsAcrossDatasets = new HashSet<String>();
        HashSet<String> snpsDuplicateAcrossDatasets = new HashSet<String>();
        TextFile duplicateOut = new TextFile(outputDir + "DuplicateSNPWithinDatasets.txt", true);
        duplicateOut.writeln("dataset\tsnp");
        String[][] snpsDuplicatePerDataset = new String[ds.length][0];
        for (d2 = 0; d2 < ds.length; ++d2) {
            String[] snpsInDataset = ds[d2].getSNPs();
            HashSet<String> snpsVisitedInThisDataset = new HashSet<String>();
            HashSet<String> duplicateSNPsInThisDataset = new HashSet<String>();
            for (String s : snpsInDataset) {
                if (!visitedSNPsAcrossDatasets.contains(s)) {
                    visitedSNPsAcrossDatasets.add(s);
                } else {
                    snpsDuplicateAcrossDatasets.add(s);
                }
                if (!snpsVisitedInThisDataset.contains(s)) {
                    snpsVisitedInThisDataset.add(s);
                    continue;
                }
                duplicateOut.writeln(d2 + "\t" + (String)s);
                duplicateSNPsInThisDataset.add(s);
            }
            snpsDuplicatePerDataset[d2] = duplicateSNPsInThisDataset.toArray(new String[0]);
            System.out.println("Dataset\t" + d2 + "\thas\t" + duplicateSNPsInThisDataset.size() + "\tduplicate SNPs");
        }
        duplicateOut.close();
        for (d2 = 0; d2 < ds.length; ++d2) {
            if (snpsDuplicatePerDataset[d2].length <= 0) continue;
            log.writeln("Dataset " + d2 + " has duplicate SNPs. Please remove them before you continue");
            System.out.println("Dataset " + d2 + " has duplicate SNPs. Please remove them before you continue");
            log.close();
            System.exit(-1);
        }
        log.writeln("Duplicate SNPs accross datasets: " + snpsDuplicateAcrossDatasets.size());
        System.out.println("Duplicate SNPs accross datasets: " + snpsDuplicateAcrossDatasets.size());
        duplicateOut = new TextFile(outputDir + "DuplicateSNPAccrossDatasets.txt", true);
        String header = "SNP";
        for (int d6 = 0; d6 < ds.length; ++d6) {
            header = header + "\t" + d6 + " Alleles\t" + d6 + " MinorAllele\t" + d6 + " MAF\t" + d6 + " HWEP\t" + d6 + " CR";
        }
        duplicateOut.writeln(header);
        SNPLoader[] loaders = new SNPLoader[ds.length];
        for (int d7 = 0; d7 < ds.length; ++d7) {
            loaders[d7] = ds[d7].createSNPLoader();
        }
        TextFile genotypeComp = new TextFile(outputDir + "GenotypeComparisonOfDuplicateSNPs.txt.gz", true);
        genotypeComp.writeln("SNP\td1\tCR\tMAF\tHWEP\tAlleles\tMinorAllele\tflipallele1\td2\tCR\tMAF\tHWEP\tAlleles\tMinorAllele\tflipallele2\tcalled\tdifferent\tidentical");
        int[][] indLookup = new int[ds.length][includedIndividualsToId.size()];
        for (int d8 = 0; d8 < ds.length; ++d8) {
            String[] inds = ds[d8].getIndividuals();
            indLookup[d8] = new int[inds.length];
            for (int i = 0; i < includedIndividuals.size(); ++i) {
                indLookup[d8][i] = ds[d8].getIndividualToId().get(includedIndividuals.get(i));
            }
        }
        int nrWithInCompatibleAlleles = 0;
        HashMap<String, Integer> duplicateSNPsSelectFromThisDataset = new HashMap<String, Integer>();
        HashSet<String> duplicateSNPsThatShouldBeIncluded = new HashSet<String>();
        for (String s : snpsDuplicateAcrossDatasets) {
            SNP[] snps = new SNP[ds.length];
            StringBuilder output = new StringBuilder();
            for (int d9 = 0; d9 < ds.length; ++d9) {
                Integer snpId = ds[d9].getSnpToSNPId().get((Object)s);
                if (snpId == -9) continue;
                snps[d9] = ds[d9].getSNPObject(snpId);
                loaders[d9].loadGenotypes(snps[d9]);
                output.append("\t").append(BaseAnnot.toString(snps[d9].getAlleles()[0])).append("/").append(BaseAnnot.toString(snps[d9].getAlleles()[1])).append("\t").append(BaseAnnot.toString(snps[d9].getMinorAllele())).append("\t").append(snps[d9].getMAF()).append("\t").append(snps[d9].getHWEP()).append("\t").append(snps[d9].getCR());
            }
            duplicateOut.writeln(s + output.toString());
            Boolean[] flipAlleles = CompareAllelicDirections.compare(snps);
            if (flipAlleles == null) {
                String out = s;
                for (int d10 = 0; d10 < ds.length; ++d10) {
                    if (snps[d10] != null) {
                        SNP snpObj = snps[d10];
                        out = out + "\t" + d10 + "\t" + BaseAnnot.toString(snpObj.getAlleles()[0]) + "/" + BaseAnnot.toString(snpObj.getAlleles()[1]) + "\t" + BaseAnnot.toString(snpObj.getMinorAllele());
                        continue;
                    }
                    out = out + "\tNotPresentIn" + d10;
                }
                ++nrWithInCompatibleAlleles;
                genotypeComp.writeln(out);
            } else {
                short[] genotypes1 = new short[includedIndividualsToId.size()];
                short[] genotypes2 = new short[includedIndividualsToId.size()];
                Integer[] nrDifferentPerDataset = new Integer[ds.length];
                for (int d1 = 0; d1 < ds.length; ++d1) {
                    SNP snp1 = snps[d1];
                    if (snp1 == null) continue;
                    byte[] gt1 = snp1.getGenotypes();
                    for (int i = 0; i < includedIndividuals.size(); ++i) {
                        int gt = gt1[indLookup[d1][i]];
                        if (flipAlleles[d1].booleanValue()) {
                            if (gt == 0) {
                                gt = 2;
                            } else if (gt == 2) {
                                gt = 0;
                            }
                        }
                        genotypes1[i] = gt;
                    }
                    for (int d22 = d1 + 1; d22 < ds.length; ++d22) {
                        SNP snp2 = snps[d22];
                        if (snp2 == null) continue;
                        byte[] gt2 = snp2.getGenotypes();
                        for (int i = 0; i < includedIndividuals.size(); ++i) {
                            int gt = gt2[indLookup[d22][i]];
                            if (flipAlleles[d22].booleanValue()) {
                                if (gt == 0) {
                                    gt = 2;
                                } else if (gt == 2) {
                                    gt = 0;
                                }
                            }
                            genotypes2[i] = gt;
                        }
                        int identical = 0;
                        int different = 0;
                        int called = 0;
                        for (int g = 0; g < genotypes1.length; ++g) {
                            short g1 = genotypes1[g];
                            short g2 = genotypes2[g];
                            if (g1 < 0 || g2 < 0) continue;
                            ++called;
                            if (g1 == g2) {
                                ++identical;
                                continue;
                            }
                            ++different;
                        }
                        StringBuilder snpStats1 = new StringBuilder();
                        snpStats1.append(snp1.getCR()).append("\t").append(snp1.getMAF()).append("\t").append(snp1.getHWEP()).append("\t").append(BaseAnnot.toString(snp1.getAlleles()[0])).append("/").append(BaseAnnot.toString(snp1.getAlleles()[1])).append("\t").append(BaseAnnot.toString(snp1.getMinorAllele()));
                        StringBuilder snpStats2 = new StringBuilder();
                        snpStats2.append(snp2.getCR()).append("\t").append(snp2.getMAF()).append("\t").append(snp2.getHWEP()).append("\t").append(BaseAnnot.toString(snp2.getAlleles()[0])).append("/").append(BaseAnnot.toString(snp2.getAlleles()[1])).append("\t").append(BaseAnnot.toString(snp2.getMinorAllele()));
                        genotypeComp.writeln(s + "\t" + d1 + "\t" + snpStats1.toString() + "\t" + flipAlleles[d1] + "\t" + d22 + "\t" + snpStats2.toString() + "\t" + flipAlleles[d22] + "\t" + called + "\t" + different + "\t" + identical);
                        nrDifferentPerDataset[d1] = different;
                    }
                }
                int compsMade = 0;
                double diffSum = 0.0;
                for (int d11 = 0; d11 < ds.length; ++d11) {
                    if (nrDifferentPerDataset[d11] == null) continue;
                    diffSum += (double)nrDifferentPerDataset[d11].intValue();
                    ++compsMade;
                }
                double ratio = diffSum / (double)compsMade;
                if (!(ratio >= 1.0)) {
                    int maxCRDs = -1;
                    double maxCR = 0.0;
                    for (int d12 = 0; d12 < ds.length; ++d12) {
                        SNP snpObj = snps[d12];
                        if (!(snpObj.getCR() > maxCR)) continue;
                        maxCR = snpObj.getCR();
                        maxCRDs = d12;
                    }
                    duplicateSNPsThatShouldBeIncluded.add(s);
                    duplicateSNPsSelectFromThisDataset.put(s, maxCRDs);
                }
            }
            for (int d13 = 0; d13 < ds.length; ++d13) {
                snps[d13].clearGenotypes();
            }
        }
        System.out.println("Nr of SNPs with incompatible alleles: " + nrWithInCompatibleAlleles);
        genotypeComp.close();
        duplicateOut.close();
        HashSet<String> duplicateSNPs = new HashSet<String>();
        duplicateSNPs.addAll(Arrays.asList(snpsDuplicatePerDataset[0]));
        duplicateSNPs.addAll(Arrays.asList(snpsDuplicatePerDataset[1]));
        duplicateSNPs.addAll(snpsDuplicateAcrossDatasets);
        snpsDuplicatePerDataset = null;
        visitedSNPsAcrossDatasets = null;
        log.writeln("Total number of duplicate SNPs: " + duplicateSNPs.size());
        System.out.println("Total number of duplicate SNPs: " + duplicateSNPs.size());
        String[] listOfDuplicateSNPs = duplicateSNPs.toArray(new String[0]);
        log.writeList(Arrays.asList(listOfDuplicateSNPs));
        ArrayList<String> uniqueSNPs = new ArrayList<String>();
        boolean[] datasethasUniqueSNPs = new boolean[ds.length];
        for (int d14 = 0; d14 < ds.length; ++d14) {
            String[] snpsInDataset;
            for (String s : snpsInDataset = ds[d14].getSNPs()) {
                if (duplicateSNPs.contains(s)) continue;
                uniqueSNPs.add(s);
                datasethasUniqueSNPs[d14] = true;
            }
        }
        TextFile snpsout = new TextFile(outputDir + "UniqueSNPsAccrossDatasets.txt", true);
        String[] listOfSNPs = uniqueSNPs.toArray(new String[0]);
        snpsout.writeList(Arrays.asList(listOfSNPs));
        snpsout.close();
        log.writeln("Unique SNPs: " + uniqueSNPs.size() + "\tselected duplicate SNPs: " + duplicateSNPsSelectFromThisDataset.size() + "\ttotal: " + (uniqueSNPs.size() + duplicateSNPsSelectFromThisDataset.size()));
        System.out.println("Unique SNPs: " + uniqueSNPs.size() + "\tselected duplicate SNPs: " + duplicateSNPsSelectFromThisDataset.size() + "\ttotal: " + (uniqueSNPs.size() + duplicateSNPsSelectFromThisDataset.size()));
        if (uniqueSNPs.isEmpty()) {
            log.writeln("No unique SNPs detected. Nothing to merge");
            System.out.println("No unique SNPs detected. Nothing to merge");
            log.close();
            System.exit(0);
        }
        TextFile outInds = new TextFile(outputDir + "Individuals.txt", true);
        TextFile outPheno = new TextFile(outputDir + "PhenotypeInformation.txt", true);
        block26: for (String ind : includedIndividuals) {
            outInds.writeln(ind);
            for (int d15 = 0; d15 < ds.length; ++d15) {
                Integer id = ds[d15].getIndividualId(ind);
                if (id == null) continue;
                String gender = "male";
                if (ds[d15].getIsFemale()[id] == null) {
                    gender = "unknown";
                } else if (ds[d15].getIsFemale()[id].booleanValue()) {
                    gender = "female";
                }
                String status = "control";
                if (ds[d15].getIsCase()[id] == null) {
                    status = "unknown";
                } else if (ds[d15].getIsCase()[id].booleanValue()) {
                    status = "case";
                }
                outPheno.writeln(ind + "\t" + status + "\tinclude\t" + gender);
                continue block26;
            }
        }
        outInds.close();
        outPheno.close();
        log.writeln();
        log.writeln("Final size of matrix will be: " + includedIndividuals.size() + " x " + (uniqueSNPs.size() + duplicateSNPsSelectFromThisDataset.size()));
        System.out.println("Final size of matrix will be: " + includedIndividuals.size() + " x " + (uniqueSNPs.size() + duplicateSNPsSelectFromThisDataset.size()));
        uniqueSNPs.addAll(duplicateSNPsThatShouldBeIncluded);
        File fileGenotypeMatrix = new File(outputDir + "GenotypeMatrix.dat");
        if (fileGenotypeMatrix.exists()) {
            fileGenotypeMatrix.delete();
            fileGenotypeMatrix = new File(outputDir + "GenotypeMatrix.dat");
        }
        WGAFileMatrixGenotype newDS = new WGAFileMatrixGenotype(uniqueSNPs.size(), includedIndividuals.size(), fileGenotypeMatrix, false);
        TextFile snpout = new TextFile(outputDir + "SNPs.txt", true);
        TextFile snpMappingsOut = new TextFile(outputDir + "SNPMappings.txt", true);
        int nrSnps = uniqueSNPs.size();
        int nrSamplesIncluded = includedIndividuals.size();
        HashSet<String> snpsProcessed = new HashSet<String>();
        ProgressBar pb = new ProgressBar(nrSnps, "Parsing SNPs");
        int[] dataFromSet = new int[ds.length];
        for (d = 0; d < ds.length; ++d) {
            dataFromSet[d] = 0;
            TriTyperGenotypeData data = ds[d];
            if (!datasethasUniqueSNPs[d]) continue;
            String[] inds = data.getIndividuals();
            int nrSamplesInDataset = inds.length;
            Integer[] sampleToNewSampleId = new Integer[nrSamplesInDataset];
            for (int i = 0; i < inds.length; ++i) {
                sampleToNewSampleId[i] = (Integer)includedIndividualsToId.get(inds[i]);
            }
            System.out.println("Now processing dataset " + d + " (" + datasetLocations[d] + ")");
            SNPLoader loader = loaders[d];
            for (int s = 0; s < nrSnps; ++s) {
                String snpname = (String)uniqueSNPs.get(s);
                Integer snpId = null;
                if (duplicateSNPsThatShouldBeIncluded.contains(snpname)) {
                    Integer dsId = (Integer)duplicateSNPsSelectFromThisDataset.get(snpname);
                    if (dsId != null && dsId.equals(d)) {
                        snpId = data.getSnpToSNPId().get((Object)snpname);
                    }
                } else {
                    snpId = data.getSnpToSNPId().get((Object)snpname);
                }
                if (snpId == null) continue;
                int n = d;
                dataFromSet[n] = dataFromSet[n] + 1;
                snpsProcessed.add(snpname);
                SNP snpObj = data.getSNPObject(snpId);
                snpout.writeln(snpname);
                snpMappingsOut.writeln(ChrAnnotation.parseByte(snpObj.getChr()) + "\t" + snpObj.getChrPos() + "\t" + snpname);
                loader.loadGenotypes(snpObj);
                byte[] allele1 = snpObj.getAllele1();
                byte[] allele2 = snpObj.getAllele2();
                byte[] outputallele1 = new byte[nrSamplesIncluded];
                byte[] outputallele2 = new byte[nrSamplesIncluded];
                for (int i = 0; i < nrSamplesInDataset; ++i) {
                    Integer newId = sampleToNewSampleId[i];
                    if (newId == null) continue;
                    outputallele1[newId.intValue()] = allele1[i];
                    outputallele2[newId.intValue()] = allele2[i];
                }
                newDS.setAlleles(s, outputallele1, outputallele2);
                snpObj.clearGenotypes();
                pb.iterate();
            }
            loader.close();
        }
        pb.close();
        for (d = 0; d < ds.length; ++d) {
            System.out.println("Percentage of SNPs form dataset " + d + ": " + (double)dataFromSet[d] / (double)uniqueSNPs.size() * 100.0);
        }
        if (snpsProcessed.size() != uniqueSNPs.size()) {
            log.writeln("ERROR: nr of processed SNPs unequal to nr of unique SNPs. Found: " + snpsProcessed.size() + "\tExpected: " + uniqueSNPs.size());
            System.out.println("ERROR: nr of processed SNPs unequal to nr of unique SNPs. Found: " + snpsProcessed.size() + "\tExpected: " + uniqueSNPs.size());
        } else {
            log.writeln("Everything seems to be ok. Have a nice day.");
            System.out.println("Everything seems to be ok. Have a nice day.");
        }
        snpout.close();
        snpMappingsOut.close();
        newDS.close();
        log.close();
    }

    public void checkMerge(String[] datasetLocations, String outputDir) throws IOException {
        System.out.println("Checking MERGE");
        HashSet<String> uniquesnps = new HashSet<String>();
        TextFile snpsIn = new TextFile(outputDir + "UniqueSNPsAccrossDatasets.txt", false);
        uniquesnps.addAll(snpsIn.readAsArrayList());
        snpsIn.close();
        TextFile log = new TextFile(outputDir + "mergecheck.txt", true);
        log.writeln("Checking: " + outputDir);
        System.out.println("Checking: " + outputDir);
        TriTyperGenotypeData output = new TriTyperGenotypeData();
        output.load(outputDir);
        SNPLoader outputLoader = output.createSNPLoader();
        String[] indsInOutput = output.getIndividuals();
        for (int d = 0; d < datasetLocations.length; ++d) {
            log.writeln("Now parsing: " + datasetLocations[d]);
            System.out.println("Now parsing: " + datasetLocations[d]);
            TriTyperGenotypeData input = new TriTyperGenotypeData();
            input.load(datasetLocations[d]);
            Integer[] fromInputToOutput = new Integer[indsInOutput.length];
            for (int ind = 0; ind < indsInOutput.length; ++ind) {
                fromInputToOutput[ind] = input.getIndividualId(indsInOutput[ind]);
            }
            SNPLoader inputLoader = input.createSNPLoader();
            String[] snpsInInput = input.getSNPs();
            Integer[] snpMap = new Integer[snpsInInput.length];
            for (int s = 0; s < snpMap.length; ++s) {
                Integer snpIdInOutput;
                String snpName = snpsInInput[s];
                if (!uniquesnps.contains(snpName) || (snpIdInOutput = Integer.valueOf(output.getSnpToSNPId().get((Object)snpsInInput[s]))) == -9) continue;
                SNP snpObjInOutput = output.getSNPObject(snpIdInOutput);
                SNP snpObjInInput = input.getSNPObject(s);
                SNP[] snpObjs = new SNP[]{snpObjInInput, snpObjInOutput};
                if (snpObjInInput == null || snpObjInOutput == null) {
                    System.out.println("WARNING: snp present in input but not in output:\t" + snpMap[s] + "\t" + s + "\t" + snpIdInOutput);
                    continue;
                }
                inputLoader.loadGenotypes(snpObjInInput);
                outputLoader.loadGenotypes(snpObjInOutput);
                Boolean[] flipAlleles = CompareAllelicDirections.compare(snpObjs);
                if (flipAlleles == null) {
                    String outStr = snpsInInput[s];
                    for (int ds = 0; ds < snpObjs.length; ++ds) {
                        String dsName = datasetLocations[d];
                        if (ds == 1) {
                            dsName = "output";
                        }
                        if (snpObjs[ds] != null) {
                            SNP snpObj = snpObjs[ds];
                            outStr = outStr + "\t" + dsName + "\t" + BaseAnnot.toString(snpObj.getAlleles()[0]) + "/" + BaseAnnot.toString(snpObj.getAlleles()[1]) + "\t" + BaseAnnot.toString(snpObj.getMinorAllele());
                            continue;
                        }
                        outStr = outStr + "\tNotPresentIn" + dsName;
                    }
                } else {
                    short[] genotypes1 = new short[fromInputToOutput.length];
                    short[] genotypes2 = new short[fromInputToOutput.length];
                    Integer[] nrDifferentPerDataset = new Integer[2];
                    int d1 = 0;
                    int d2 = 1;
                    SNP snp1 = snpObjs[d1];
                    byte[] gt1 = snp1.getGenotypes();
                    for (int i = 0; i < fromInputToOutput.length; ++i) {
                        int gt = gt1[fromInputToOutput[i]];
                        if (flipAlleles[d1].booleanValue()) {
                            if (gt == 0) {
                                gt = 2;
                            } else if (gt == 2) {
                                gt = 0;
                            }
                        }
                        genotypes1[i] = gt;
                    }
                    SNP snp2 = snpObjs[d2];
                    byte[] gt2 = snp2.getGenotypes();
                    for (int i = 0; i < fromInputToOutput.length; ++i) {
                        int gt = gt2[i];
                        if (flipAlleles[d2].booleanValue()) {
                            if (gt == 0) {
                                gt = 2;
                            } else if (gt == 2) {
                                gt = 0;
                            }
                        }
                        genotypes2[i] = gt;
                    }
                    int identical = 0;
                    int different = 0;
                    int called = 0;
                    for (int g = 0; g < genotypes1.length; ++g) {
                        short g1 = genotypes1[g];
                        short g2 = genotypes2[g];
                        if (g1 < 0 || g2 < 0) continue;
                        ++called;
                        if (g1 == g2) {
                            ++identical;
                            continue;
                        }
                        ++different;
                    }
                    String snpStats1 = snp1.getName() + "\t" + snp1.getCR() + "\t" + snp1.getMAF() + "\t" + snp1.getHWEP() + "\t" + BaseAnnot.toString(snp1.getAlleles()[0]) + "/" + BaseAnnot.toString(snp1.getAlleles()[1]) + "\t" + BaseAnnot.toString(snp1.getMinorAllele());
                    String snpStats2 = snp2.getName() + "\t" + snp2.getCR() + "\t" + snp2.getMAF() + "\t" + snp2.getHWEP() + "\t" + BaseAnnot.toString(snp2.getAlleles()[0]) + "/" + BaseAnnot.toString(snp2.getAlleles()[1]) + "\t" + BaseAnnot.toString(snp2.getMinorAllele());
                    log.writeln(snpMap[s] + "\t" + datasetLocations[d] + "\t" + snpStats1 + "\t" + flipAlleles[d1] + "\toutput\t" + snpStats2 + "\t" + flipAlleles[d2] + "\t" + called + "\t" + different + "\t" + identical);
                    nrDifferentPerDataset[d1] = different;
                }
                for (int q = 0; q < snpObjs.length; ++q) {
                    snpObjs[q].clearGenotypes();
                }
            }
            inputLoader.close();
            input = null;
        }
        outputLoader.close();
        output = null;
        log.close();
    }
}

