/*
 * Decompiled with CFR 0.152.
 */
package nl.umcg.deelenp.genotypeharmonizer;

import JSci.maths.ArrayMath;
import com.google.common.collect.Lists;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.TreeMap;
import nl.umcg.deelenp.genotypeharmonizer.GenotypeAlignmentException;
import nl.umcg.deelenp.genotypeharmonizer.GenotypeHarmonizer;
import nl.umcg.deelenp.genotypeharmonizer.SnpLogWriter;
import org.apache.log4j.Logger;
import org.molgenis.genotype.RandomAccessGenotypeData;
import org.molgenis.genotype.modifiable.ModifiableGeneticVariant;
import org.molgenis.genotype.modifiable.ModifiableGenotypeData;
import org.molgenis.genotype.modifiable.ModifiableGenotypeDataInMemory;
import org.molgenis.genotype.util.Ld;
import org.molgenis.genotype.util.LdCalculator;
import org.molgenis.genotype.util.LdCalculatorException;
import org.molgenis.genotype.variant.GeneticVariant;

public class Aligner {
    private static Logger LOGGER = Logger.getLogger(GenotypeHarmonizer.class);

    public ModifiableGenotypeData alignToRef(RandomAccessGenotypeData study, RandomAccessGenotypeData ref, double minLdToIncludeAlign, double minSnpsToAlignOn, int flankSnpsToConsider, boolean ldCheck, boolean updateId, boolean keep, File snpUpdateFile, double maxMafForMafAlignment, File snpLogFile, boolean matchRefAllele) throws LdCalculatorException, IOException, GenotypeAlignmentException {
        GeneticVariant refVariant;
        ModifiableGenotypeDataInMemory aligendStudyData = new ModifiableGenotypeDataInMemory(study);
        ArrayList<ModifiableGeneticVariant> studyVariantList = new ArrayList<ModifiableGeneticVariant>();
        ArrayList<GeneticVariant> refVariantList = new ArrayList<GeneticVariant>();
        BufferedWriter snpUpdateWriter = null;
        if (updateId) {
            snpUpdateWriter = new BufferedWriter(new FileWriter(snpUpdateFile));
            snpUpdateWriter.append("chr\tpos\toriginalId\tnewId\n");
        }
        SnpLogWriter snpLogWriter = new SnpLogWriter(snpLogFile);
        int iterationCounter = 0;
        int nonGcNonAtSnpsEncountered = 0;
        int nonGcNonAtSnpsSwapped = 0;
        block0: for (ModifiableGeneticVariant studyVariant : aligendStudyData.getModifiableGeneticVariants()) {
            if (++iterationCounter % 10000 == 0) {
                System.out.println("Iteration 1 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(iterationCounter) + " variants processed");
            }
            if (!studyVariant.isMapped()) {
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "No mapping");
                studyVariant.exclude();
                continue;
            }
            if (studyVariant.getStartPos() == 0) {
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "No mapping");
                studyVariant.exclude();
                continue;
            }
            if (!studyVariant.isSnp()) {
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Not a SNP");
                studyVariant.exclude();
                continue;
            }
            if (!studyVariant.isBiallelic()) {
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Not biallelic");
                studyVariant.exclude();
                continue;
            }
            Iterator<GeneticVariant> potentialRefVariants = ref.getVariantsByPos(studyVariant.getSequenceName(), studyVariant.getStartPos()).iterator();
            refVariant = null;
            if (!potentialRefVariants.hasNext()) {
                if (keep) continue;
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "No variants at this position in reference");
                studyVariant.exclude();
                continue;
            }
            ArrayList<GeneticVariant> potentialRefVariantsList = Lists.newArrayList(potentialRefVariants);
            for (GeneticVariant potentialRefVariant : potentialRefVariantsList) {
                if (!potentialRefVariant.getVariantId().isSameId(studyVariant.getVariantId())) continue;
                if (potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles()) || potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles().getComplement())) {
                    refVariant = potentialRefVariant;
                    continue;
                }
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Found variant with same ID but alleles are not comparable");
                studyVariant.exclude();
                continue block0;
            }
            if (refVariant == null) {
                for (GeneticVariant potentialRefVariant : potentialRefVariantsList) {
                    if (!potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles()) && !potentialRefVariant.getVariantAlleles().sameAlleles(studyVariant.getVariantAlleles().getComplement())) continue;
                    if (refVariant == null) {
                        refVariant = potentialRefVariant;
                        continue;
                    }
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Position maps to multiple variants with same alleles. Neither of these variants have same ID as this variant. No way to know what the corresponding variant is");
                    studyVariant.exclude();
                    continue block0;
                }
                if (refVariant == null) {
                    if (keep) continue;
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "No variant in the reference at this position with same ID or same alleles");
                    studyVariant.exclude();
                    continue;
                }
            }
            if (!(!updateId || refVariant.getPrimaryVariantId() == null && studyVariant.getPrimaryVariantId() == null || refVariant.getPrimaryVariantId() != null && studyVariant.getPrimaryVariantId() != null && studyVariant.getPrimaryVariantId().equals(refVariant.getPrimaryVariantId()))) {
                snpUpdateWriter.append(studyVariant.getSequenceName());
                snpUpdateWriter.append('\t');
                snpUpdateWriter.append(String.valueOf(studyVariant.getStartPos()));
                snpUpdateWriter.append('\t');
                snpUpdateWriter.append(studyVariant.getPrimaryVariantId());
                snpUpdateWriter.append('\t');
                snpUpdateWriter.append(refVariant.getPrimaryVariantId());
                snpUpdateWriter.append('\n');
                LOGGER.debug("Updating primary variant ID of " + studyVariant.getPrimaryVariantId() + " to: " + refVariant.getPrimaryVariantId());
                studyVariant.updatePrimaryId(refVariant.getPrimaryVariantId());
            }
            if (!studyVariant.isAtOrGcSnp()) {
                ++nonGcNonAtSnpsEncountered;
                if (!studyVariant.getVariantAlleles().sameAlleles(refVariant.getVariantAlleles())) {
                    ++nonGcNonAtSnpsSwapped;
                    studyVariant.swap();
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.SWAPPED, "");
                }
                if (matchRefAllele) {
                    studyVariant.updateRefAllele(refVariant.getRefAllele());
                }
            }
            studyVariantList.add(studyVariant);
            refVariantList.add(refVariant);
        }
        if (updateId) {
            snpUpdateWriter.close();
        }
        if (iterationCounter == 0) {
            throw new GenotypeAlignmentException("No variants where found in the input genotype data. Please check your variant filter options");
        }
        LOGGER.info("Iteration 1 - Completed, non A/T and non G/C SNPs are aligned " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(nonGcNonAtSnpsEncountered) + " found and " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(nonGcNonAtSnpsSwapped) + " swapped");
        System.out.println("Iteration 1 - Completed, non A/T and non G/C SNPs are aligned " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(nonGcNonAtSnpsEncountered) + " found and " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(nonGcNonAtSnpsSwapped) + " swapped");
        if (studyVariantList.isEmpty()) {
            snpLogWriter.close();
            throw new GenotypeAlignmentException("Zero of the input variants found in reference set. Are both datasets the same genome build? Perhapse you need use --forceChr.");
        }
        int removedSnpsBasedOnLdCheck = 0;
        Collections.sort(studyVariantList);
        Collections.sort(refVariantList);
        LOGGER.debug("Sorting of variant lists completed");
        if (ldCheck) {
            iterationCounter = 0;
            for (int variantIndex = 0; variantIndex < studyVariantList.size(); ++variantIndex) {
                if (++iterationCounter % 10000 == 0) {
                    System.out.println("Iteration 2 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(iterationCounter) + " variants processed");
                }
                ModifiableGeneticVariant studyVariant = (ModifiableGeneticVariant)studyVariantList.get(variantIndex);
                refVariant = (GeneticVariant)refVariantList.get(variantIndex);
                if (studyVariant.isAtOrGcSnp()) continue;
                CorrelationResults hapCor = this.correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant);
                if ((double)hapCor.getTotalCor() < minSnpsToAlignOn) {
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Not enough non A/T or G/C in LD to check LD pattern");
                    studyVariant.exclude();
                    continue;
                }
                if (hapCor.getPosCor() >= hapCor.getNegCor()) continue;
                ++removedSnpsBasedOnLdCheck;
                snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Non A/T or G/C SNP with inconsistency in LD pattern");
                studyVariant.exclude();
            }
            LOGGER.info("Iteration 2 - Completed, non A/T and non G/C SNPs are LD checked");
            System.out.println("Iteration 2 - Completed, non A/T and non G/C SNPs are LD checked ");
            LOGGER.info("Excluded " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(removedSnpsBasedOnLdCheck) + " non A/T and non G/C SNPs based on inconsistencies in LD pattern");
        } else {
            System.out.println("Iteration 2 - Skipped, non A/T and non G/C SNPs are not LD checked ");
            LOGGER.info("Iteration 2 - Skipped, non A/T and non G/C SNPs are not LD checked ");
        }
        iterationCounter = 0;
        int GcAtSnpsEncountered = 0;
        int swapBasedOnLdCount = 0;
        removedSnpsBasedOnLdCheck = 0;
        for (int variantIndex = 0; variantIndex < studyVariantList.size(); ++variantIndex) {
            if (++iterationCounter % 10000 == 0) {
                System.out.println("Iteration 3 - " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(iterationCounter) + " variants processed (" + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(GcAtSnpsEncountered) + " G/C or A/T SNPs checked)");
            }
            ModifiableGeneticVariant studyVariant = (ModifiableGeneticVariant)studyVariantList.get(variantIndex);
            GeneticVariant refVariant2 = (GeneticVariant)refVariantList.get(variantIndex);
            if (!studyVariant.isAtOrGcSnp()) continue;
            ++GcAtSnpsEncountered;
            CorrelationResults hapCor = this.correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant2);
            if (((double)hapCor.getTotalCor() < minSnpsToAlignOn || hapCor.getPosCor() == hapCor.getNegCor()) && !ldCheck && studyVariant.getMinorAlleleFrequency() <= maxMafForMafAlignment && refVariant2.getMinorAlleleFrequency() <= maxMafForMafAlignment) {
                if (studyVariant.getMinorAllele() != refVariant2.getMinorAllele()) {
                    studyVariant.swap();
                    ++swapBasedOnLdCount;
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.SWAPPED, "Based on minor allele, study MAF: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ") reference MAF: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")");
                } else if (LOGGER.isDebugEnabled()) {
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.MAINTAINED, "Based on minor allele, study MAF: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ") reference MAF: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")");
                }
            } else {
                if ((double)hapCor.getTotalCor() < minSnpsToAlignOn) {
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Not enough non A/T or non G/C in LD to assess strand based on LD. Pos cor " + hapCor.getPosCor() + " neg cor " + hapCor.getNegCor() + " MAF study: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ") MAF reference: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")");
                    studyVariant.exclude();
                    continue;
                }
                if (hapCor.getPosCor() == hapCor.getNegCor()) {
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "Equal number of positive and negative correlations. Pos cor " + hapCor.getPosCor() + " neg cor " + hapCor.getNegCor() + " MAF study: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ") MAF reference: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")");
                    studyVariant.exclude();
                    continue;
                }
                if (hapCor.getPosCor() < hapCor.getNegCor()) {
                    CorrelationResults hapCorSwapped;
                    studyVariant.swap();
                    ++swapBasedOnLdCount;
                    if (LOGGER.isDebugEnabled()) {
                        snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.SWAPPED, "Based on LD. Pos cor " + hapCor.getPosCor() + " neg cor " + hapCor.getNegCor() + " MAF study: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ") MAF reference: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")");
                    } else {
                        snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.SWAPPED, "Based on LD");
                    }
                    if (ldCheck && (hapCorSwapped = this.correlateHaplotypes(minLdToIncludeAlign, flankSnpsToConsider, studyVariantList, refVariantList, variantIndex, studyVariant, refVariant2)).getPosCor() < hapCorSwapped.getNegCor()) {
                        ++removedSnpsBasedOnLdCheck;
                        snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.EXCLUDED, "G/C or A/T SNP with inconsistency in LD pattern that is not solved by swapping");
                        studyVariant.exclude();
                        continue;
                    }
                } else if (LOGGER.isDebugEnabled()) {
                    snpLogWriter.addToLog(studyVariant, SnpLogWriter.Actions.MAINTAINED, "Based on LD. Pos cor " + hapCor.getPosCor() + " neg cor " + hapCor.getNegCor() + " MAF study: " + studyVariant.getMinorAlleleFrequency() + "(" + studyVariant.getMinorAllele() + ") MAF reference: " + refVariant2.getMinorAlleleFrequency() + "(" + refVariant2.getMinorAllele() + ")");
                }
            }
            if (!matchRefAllele) continue;
            studyVariant.updateRefAllele(refVariant2.getRefAllele());
        }
        if (ldCheck) {
            LOGGER.info("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned and LD check afterwards");
            System.out.println("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned and LD check afterwards");
        } else {
            LOGGER.info("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned. Extra LD check skipped");
            System.out.println("Iteration 3 - Completed, non A/T and non G/C SNPs are aligned. Extra LD check skipped");
        }
        if (ldCheck) {
            LOGGER.info("Excluded " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(removedSnpsBasedOnLdCheck) + " A/T or G/C variants based on LD patterns");
            System.out.println("Excluded " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(removedSnpsBasedOnLdCheck) + " A/T or G/C variants based on LD patterns");
        }
        LOGGER.info("Swapped " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(swapBasedOnLdCount) + " out of " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(GcAtSnpsEncountered) + " A/T or G/C variants based on LD patterns");
        System.out.println("Swapped " + GenotypeHarmonizer.DEFAULT_NUMBER_FORMATTER.format(swapBasedOnLdCount) + " A/T or G/C variants based on LD patterns");
        snpLogWriter.close();
        return aligendStudyData;
    }

    private CorrelationResults correlateHaplotypes(double minLdToIncludeAlignBase, int flankSnpsToConsider, ArrayList<ModifiableGeneticVariant> studyVariantList, ArrayList<GeneticVariant> refVariantList, int variantIndex, GeneticVariant snpStudyVariant, GeneticVariant refVariant) {
        int posCor = 0;
        int negCor = 0;
        for (int otherVariantIndex = Math.max(0, variantIndex - flankSnpsToConsider); otherVariantIndex < variantIndex + flankSnpsToConsider && otherVariantIndex < studyVariantList.size(); ++otherVariantIndex) {
            double refHapVar;
            Ld ldRef;
            Ld ldStudy;
            if (variantIndex == otherVariantIndex) continue;
            GeneticVariant otherSnpStudyVariant = studyVariantList.get(otherVariantIndex);
            if (!snpStudyVariant.getSequenceName().equals(otherSnpStudyVariant.getSequenceName()) || otherSnpStudyVariant.isAtOrGcSnp()) continue;
            GeneticVariant otherRefVariant = refVariantList.get(otherVariantIndex);
            try {
                ldStudy = LdCalculator.calculateLd(snpStudyVariant, otherSnpStudyVariant);
                ldRef = LdCalculator.calculateLd(refVariant, otherRefVariant);
            }
            catch (LdCalculatorException e) {
                LOGGER.debug("Error in LD calculation, skipping this comparison when comparing haplotype structure. Following error occurred: " + e.getMessage());
                continue;
            }
            if (Double.isNaN(ldStudy.getR2()) || Double.isNaN(ldRef.getR2()) || !(ldStudy.getR2() >= minLdToIncludeAlignBase) || !(ldRef.getR2() >= minLdToIncludeAlignBase)) continue;
            TreeMap<String, Double> studyHapFreq = new TreeMap<String, Double>(ldStudy.getHaplotypesFreq());
            TreeMap<String, Double> refHapFreq = new TreeMap<String, Double>(ldRef.getHaplotypesFreq());
            double[] studyHapFreqArray = this.createDoubleArrayFromCollection(studyHapFreq.values());
            double[] refHapFreqArray = this.createDoubleArrayFromCollection(refHapFreq.values());
            double studyHapVar = ArrayMath.variance(studyHapFreqArray);
            double denom = Math.sqrt(studyHapVar * (refHapVar = ArrayMath.variance(refHapFreqArray)));
            if (denom == 0.0) continue;
            double correlation = ArrayMath.covariance(studyHapFreqArray, refHapFreqArray) / denom;
            if (correlation < 0.0) {
                ++negCor;
                continue;
            }
            if (!(correlation > 0.0)) continue;
            ++posCor;
        }
        return new CorrelationResults(posCor, negCor);
    }

    private double[] createDoubleArrayFromCollection(Collection<Double> values) {
        double[] array = new double[values.size()];
        int i = 0;
        for (Double d : values) {
            array[i] = d;
            ++i;
        }
        return array;
    }

    private static class CorrelationResults {
        private final int posCor;
        private final int negCor;

        public CorrelationResults(int posCor, int negCor) {
            this.posCor = posCor;
            this.negCor = negCor;
        }

        public int getPosCor() {
            return this.posCor;
        }

        public int getNegCor() {
            return this.negCor;
        }

        public int getTotalCor() {
            return this.getPosCor() + this.getNegCor();
        }
    }
}

