/*
 * Decompiled with CFR 0.152.
 */
package nl.umcg.deelenp.genotypeharmonizer;

import JSci.maths.ArrayMath;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.TreeMap;
import nl.umcg.deelenp.genotypeharmonizer.GenotypeHarmonizer;
import org.apache.log4j.Logger;
import org.molgenis.genotype.RandomAccessGenotypeData;
import org.molgenis.genotype.modifiable.ModifiableGeneticVariant;
import org.molgenis.genotype.modifiable.ModifiableGenotypeData;
import org.molgenis.genotype.modifiable.ModifiableGenotypeDataInMemory;
import org.molgenis.genotype.util.Ld;
import org.molgenis.genotype.util.LdCalculator;
import org.molgenis.genotype.util.LdCalculatorException;
import org.molgenis.genotype.variant.GeneticVariant;

public class Aligner {
    private static Logger LOGGER = Logger.getLogger(GenotypeHarmonizer.class);

    public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomAccessGenotypeData ref, double minLdToIncludeAlign, double minSnpsToAlignOn, int flankSnpsToConsider, boolean ldCheck, boolean updateId, boolean keep, File snpUpdateFile, double maxMafForMafAlignment) throws LdCalculatorException, IOException {
        GeneticVariant refVariant;
        ModifiableGenotypeDataInMemory aligendStudyData = new ModifiableGenotypeDataInMemory(study);
        ArrayList<ModifiableGeneticVariant> studyVariantList = new ArrayList<ModifiableGeneticVariant>();
        ArrayList<GeneticVariant> refVariantList = new ArrayList<GeneticVariant>();
        BufferedWriter snpUpdateWriter = null;
        if (updateId) {
            snpUpdateWriter = new BufferedWriter(new FileWriter(snpUpdateFile));
            snpUpdateWriter.append("chr\tpos\toriginalId\tnewId\n");
        }
        int iterationCounter = 0;
        int nonGcNonAtSnpsEncountered = 0;
        int nonGcNonAtSnpsSwapped = 0;
        block0: for (ModifiableGeneticVariant studyVariant : aligendStudyData.getModifiableGeneticVariants()) {
            if (++iterationCounter % 10000 == 0) {
                LOGGER.info((Object)("Iteration 1 - " + iterationCounter + " variants processed"));
                System.out.println("Iteration 1 - " + iterationCounter + " variants processed");
            }
            if (!studyVariant.isMapped()) {
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " Has no mapping"));
                studyVariant.exclude();
                continue;
            }
            if (!studyVariant.isSnp()) {
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " currently only SNPs are supported. Feel free to contact the autors."));
                studyVariant.exclude();
                continue;
            }
            if (!studyVariant.isBiallelic()) {
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " only biallelic variants currently not supported."));
                studyVariant.exclude();
                continue;
            }
            Iterable<GeneticVariant> potentialRefVariants = ref.getVariantsByPos(studyVariant.getSequenceName(), studyVariant.getStartPos());
            refVariant = null;
            if (!potentialRefVariants.iterator().hasNext()) {
                if (keep) {
                    LOGGER.warn((Object)("No ref variant found for: " + studyVariant.getPrimaryVariantId() + " variant will not be aligned but will be written to output because of --keep"));
                    continue;
                }
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " no variants at this position " + studyVariant.getSequenceName() + ":" + studyVariant.getStartPos() + " in the reference data"));
                studyVariant.exclude();
                continue;
            }
            for (GeneticVariant potentialRefVariant : potentialRefVariants) {
                if (!potentialRefVariant.getVariantId().isSameId(studyVariant.getVariantId())) continue;
                if (potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles()) || potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles().getComplement())) {
                    refVariant = potentialRefVariant;
                    continue;
                }
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " Found variant with same ID but alleles are not comparable."));
                studyVariant.exclude();
                continue block0;
            }
            if (refVariant == null) {
                for (GeneticVariant potentialRefVariant : potentialRefVariants) {
                    if (!potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles()) && !potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles().getComplement())) continue;
                    if (refVariant == null) {
                        refVariant = potentialRefVariant;
                        continue;
                    }
                    LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " because position maps to multiple variants with same alleles. Neither of these variants have same ID as this variant. No way to know what the corresponding variant is."));
                    studyVariant.exclude();
                    continue block0;
                }
                if (refVariant == null) {
                    if (keep) {
                        LOGGER.warn((Object)("No ref variant found for: " + studyVariant.getPrimaryVariantId() + " variant will not be aligned but will be written to output because of --keep"));
                        continue;
                    }
                    LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + ". There is no variant in the reference at this position with same ID or same alleles"));
                    studyVariant.exclude();
                    continue;
                }
            }
            if (!(studyVariant.getMinorAlleleFrequency() > 0.0)) {
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " has a MAF of 0 in the study data"));
                studyVariant.exclude();
                continue;
            }
            if (!(refVariant.getMinorAlleleFrequency() > 0.0)) {
                LOGGER.warn((Object)("Excluding variant: " + refVariant.getPrimaryVariantId() + " has a MAF of 0 in the reference data"));
                studyVariant.exclude();
                continue;
            }
            if (updateId && !studyVariant.getPrimaryVariantId().equals(refVariant.getPrimaryVariantId())) {
                snpUpdateWriter.append(studyVariant.getSequenceName());
                snpUpdateWriter.append('\t');
                snpUpdateWriter.append(String.valueOf(studyVariant.getStartPos()));
                snpUpdateWriter.append('\t');
                snpUpdateWriter.append(studyVariant.getPrimaryVariantId());
                snpUpdateWriter.append('\t');
                snpUpdateWriter.append(refVariant.getPrimaryVariantId());
                snpUpdateWriter.append('\n');
                LOGGER.debug((Object)("Updating primary variant ID of " + studyVariant.getPrimaryVariantId() + " to: " + refVariant.getPrimaryVariantId()));
                studyVariant.updatePrimaryId(refVariant.getPrimaryVariantId());
            }
            if (!studyVariant.isAtOrGcSnp()) {
                ++nonGcNonAtSnpsEncountered;
                if (!studyVariant.getVariantAlleles().sameAlleles(refVariant.getVariantAlleles())) {
                    ++nonGcNonAtSnpsSwapped;
                    studyVariant.swap();
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug((Object)("Swapped strand of non AT and non GC SNP: " + studyVariant.getPrimaryVariantId() + " based on non ambiguous alleles. After swap study maf: " + studyVariant.getMinorAlleleFrequency() + " (" + studyVariant.getMinorAllele() + ") ref maf: " + refVariant.getMinorAlleleFrequency() + " (" + refVariant.getMinorAllele() + ")"));
                    }
                }
            }
            studyVariantList.add(studyVariant);
            refVariantList.add(refVariant);
        }
        if (updateId) {
            snpUpdateWriter.close();
        }
        LOGGER.info((Object)("Iteration 1 - Completed, non AT and non GC SNPs are aligned " + nonGcNonAtSnpsEncountered + " found and " + nonGcNonAtSnpsSwapped + " swapped"));
        System.out.println("Iteration 1 - Completed, non AT and non GC SNPs are aligned " + nonGcNonAtSnpsEncountered + " found and " + nonGcNonAtSnpsSwapped + " swapped");
        int removedSnpsBasedOnLdCheck = 0;
        Collections.sort(studyVariantList);
        Collections.sort(refVariantList);
        LOGGER.debug((Object)"Sorting of variant lists completed");
        if (ldCheck) {
            iterationCounter = 0;
            for (int variantIndex = 0; variantIndex < studyVariantList.size(); ++variantIndex) {
                if (++iterationCounter % 10000 == 0) {
                    LOGGER.info((Object)("Iteration 2 - " + iterationCounter + " variants processed"));
                    System.out.println("Iteration 2 - " + iterationCounter + " variants processed");
                }
                ModifiableGeneticVariant studyVariant = (ModifiableGeneticVariant)studyVariantList.get(variantIndex);
                refVariant = (GeneticVariant)refVariantList.get(variantIndex);
                if (studyVariant.isAtOrGcSnp()) continue;
                correlationResults hapCor = this.correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant);
                if ((double)hapCor.getTotalCor() < minSnpsToAlignOn) {
                    LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " Not enough non AT / GC in LD to check LD pattern."));
                    studyVariant.exclude();
                    continue;
                }
                if (hapCor.getPosCor() >= hapCor.getNegCor()) continue;
                ++removedSnpsBasedOnLdCheck;
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " non AT / GC SNP with inconsistency in LD pattern \n" + "\tStudy: " + studyVariant.getVariantAlleles() + " maf: " + studyVariant.getMinorAlleleFrequency() + " (" + studyVariant.getMinorAllele() + ")\n" + "\tRef: " + refVariant.getVariantAlleles() + " maf: " + refVariant.getMinorAlleleFrequency() + " (" + refVariant.getMinorAllele() + ")\n" + "\tTotal variants used: " + hapCor.getTotalCor() + " pos cor: " + hapCor.getPosCor()));
                studyVariant.exclude();
            }
            LOGGER.info((Object)"Iteration 2 - Completed, non AT and non GC SNPs are LD checked");
            System.out.println("Iteration 2 - Completed, non AT and non GC SNPs are LD checked ");
            LOGGER.info((Object)("Excluded " + removedSnpsBasedOnLdCheck + " non AT and non GC SNPs based on inconsistencies in LD pattern"));
        } else {
            System.out.println("Iteration 2 - Skipped, non AT and non GC SNPs are not LD checked ");
            LOGGER.info((Object)"Iteration 2 - Skipped, non AT and non GC SNPs are not LD checked ");
        }
        iterationCounter = 0;
        int GcAtSnpsEncountered = 0;
        int swapBasedOnLdCount = 0;
        removedSnpsBasedOnLdCheck = 0;
        for (int variantIndex = 0; variantIndex < studyVariantList.size(); ++variantIndex) {
            if (++iterationCounter % 10000 == 0) {
                LOGGER.info((Object)("Iteration 3 - " + iterationCounter + " variants processed (" + GcAtSnpsEncountered + " GC or AT SNPs checked)"));
                System.out.println("Iteration 3 - " + iterationCounter + " variants processed (" + GcAtSnpsEncountered + " GC or AT SNPs checked)");
            }
            ModifiableGeneticVariant studyVariant = (ModifiableGeneticVariant)studyVariantList.get(variantIndex);
            GeneticVariant refVariant2 = (GeneticVariant)refVariantList.get(variantIndex);
            if (!studyVariant.isAtOrGcSnp()) continue;
            ++GcAtSnpsEncountered;
            correlationResults hapCor = this.correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant2);
            if ((double)hapCor.getTotalCor() < minSnpsToAlignOn && !ldCheck && studyVariant.getMinorAlleleFrequency() <= maxMafForMafAlignment && refVariant2.getMinorAlleleFrequency() <= maxMafForMafAlignment) {
                LOGGER.warn((Object)("Using minor allele to determine strand of: " + studyVariant.getPrimaryVariantId() + " study MAF: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ")" + " reference MAF: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")"));
                if (studyVariant.getMinorAllele() == refVariant2.getMinorAllele()) continue;
                studyVariant.swap();
                ++swapBasedOnLdCount;
                LOGGER.debug((Object)("Swapped " + studyVariant.getPrimaryVariantId() + " using the minor allele"));
                continue;
            }
            if ((double)hapCor.getTotalCor() < minSnpsToAlignOn) {
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " Not enough non AT / GC in LD to assess strand based on LD. Pos cor " + hapCor.getPosCor() + " neg cor " + hapCor.getNegCor() + " MAF study: " + studyVariant.getMinorAlleleFrequency() + " MAF reference: " + refVariant2.getMinorAlleleFrequency()));
                studyVariant.exclude();
                continue;
            }
            if (hapCor.getPosCor() < hapCor.getNegCor()) {
                correlationResults hapCorSwapped;
                studyVariant.swap();
                ++swapBasedOnLdCount;
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug((Object)("Swapped strand of AT or GC SNP: " + studyVariant.getPrimaryVariantId() + " based on LD. After swap study maf: " + studyVariant.getMinorAlleleFrequency() + " (" + studyVariant.getMinorAllele() + ") ref maf: " + refVariant2.getMinorAlleleFrequency() + " (" + refVariant2.getMinorAllele() + ")"));
                }
                if (!ldCheck || (hapCorSwapped = this.correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant2)).getPosCor() >= hapCorSwapped.getNegCor()) continue;
                ++removedSnpsBasedOnLdCheck;
                LOGGER.warn((Object)("Excluding variant: " + studyVariant.getPrimaryVariantId() + " GC or AT SNP with inconsistency in LD pattern that is not solved by swapping"));
                studyVariant.exclude();
                continue;
            }
            if (!LOGGER.isDebugEnabled()) continue;
            LOGGER.debug((Object)("Did not swapped strand of AT or GC SNP: " + studyVariant.getPrimaryVariantId() + " based on LD. Study maf: " + studyVariant.getMinorAlleleFrequency() + " (" + studyVariant.getMinorAllele() + ") ref maf: " + refVariant2.getMinorAlleleFrequency() + " (" + refVariant2.getMinorAllele() + ")"));
        }
        if (ldCheck) {
            LOGGER.info((Object)"Iteration 3 - Completed, non AT and non GC SNPs are aligned and LD check afterwards");
            System.out.println("Iteration 3 - Completed, non AT and non GC SNPs are aligned and LD check afterwards");
        } else {
            LOGGER.info((Object)"Iteration 3 - Completed, non AT and non GC SNPs are aligned. Extra LD check skipped");
            System.out.println("Iteration 3 - Completed, non AT and non GC SNPs are aligned. Extra LD check skipped");
        }
        if (ldCheck) {
            LOGGER.info((Object)("Excluded " + removedSnpsBasedOnLdCheck + " AT or GC variants based on LD patterns"));
            System.out.println("Excluded " + removedSnpsBasedOnLdCheck + " AT or GC variants based on LD patterns");
        }
        LOGGER.info((Object)("Swapped " + swapBasedOnLdCount + " out " + GcAtSnpsEncountered + " AT or GC variants based on LD patterns"));
        System.out.println("Swapped " + swapBasedOnLdCount + " AT or GC variants based on LD patterns");
        return aligendStudyData;
    }

    private correlationResults correlateHaplotypes(double minLdToIncludeAlignBase, int flankSnpsToConsider, ArrayList<ModifiableGeneticVariant> studyVariantList, ArrayList<GeneticVariant> refVariantList, int variantIndex, GeneticVariant snpStudyVariant, GeneticVariant refVariant) {
        int posCor = 0;
        int negCor = 0;
        for (int otherVariantIndex = Math.max(0, variantIndex - flankSnpsToConsider); otherVariantIndex < variantIndex + flankSnpsToConsider && otherVariantIndex < studyVariantList.size(); ++otherVariantIndex) {
            double[] refHapFreqArray;
            Ld ldRef;
            Ld ldStudy;
            if (variantIndex == otherVariantIndex) continue;
            GeneticVariant otherSnpStudyVariant = studyVariantList.get(otherVariantIndex);
            if (!snpStudyVariant.getSequenceName().equals(otherSnpStudyVariant.getSequenceName()) || otherSnpStudyVariant.isAtOrGcSnp()) continue;
            GeneticVariant otherRefVariant = refVariantList.get(otherVariantIndex);
            try {
                ldStudy = LdCalculator.calculateLd(snpStudyVariant, otherSnpStudyVariant);
                ldRef = LdCalculator.calculateLd(refVariant, otherRefVariant);
            }
            catch (LdCalculatorException e) {
                LOGGER.warn((Object)("Error in LD calculation, skipping this comparison when comparing haplotype structure. Following error occurred: " + e.getMessage()));
                continue;
            }
            if (!(ldStudy.getR2() >= minLdToIncludeAlignBase) || !(ldRef.getR2() >= minLdToIncludeAlignBase)) continue;
            TreeMap<String, Double> studyHapFreq = new TreeMap<String, Double>(ldStudy.getHaplotypesFreq());
            TreeMap<String, Double> refHapFreq = new TreeMap<String, Double>(ldRef.getHaplotypesFreq());
            double[] studyHapFreqArray = this.createDoubleArrayFromCollection(studyHapFreq.values());
            double correlation = ArrayMath.correlation((double[])studyHapFreqArray, (double[])(refHapFreqArray = this.createDoubleArrayFromCollection(refHapFreq.values())));
            if (correlation < 0.0) {
                ++negCor;
                continue;
            }
            ++posCor;
        }
        return new correlationResults(posCor, negCor);
    }

    private double[] createDoubleArrayFromCollection(Collection<Double> values) {
        double[] array = new double[values.size()];
        int i = 0;
        for (Double d : values) {
            array[i] = d;
            ++i;
        }
        return array;
    }

    private static class correlationResults {
        private final int posCor;
        private final int negCor;

        public correlationResults(int posCor, int negCor) {
            this.posCor = posCor;
            this.negCor = negCor;
        }

        public int getPosCor() {
            return this.posCor;
        }

        public int getNegCor() {
            return this.negCor;
        }

        public int getTotalCor() {
            return this.getPosCor() + this.getNegCor();
        }
    }
}

