/*
 * Decompiled with CFR 0.152.
 */
package eqtlmappingpipeline.qcpca;

import JSci.maths.ArrayMath;
import Jama.EigenvalueDecomposition;
import cern.jet.random.tdouble.StudentT;
import cern.jet.random.tdouble.engine.DRand;
import cern.jet.random.tdouble.engine.DoubleRandomEngine;
import eqtlmappingpipeline.graphics.ScatterPlot;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.SortableSNP;
import umcg.genetica.io.Gpio;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDatasetSettings;
import umcg.genetica.io.trityper.util.DetermineLD;
import umcg.genetica.math.PCA;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.Descriptives;
import umcg.genetica.math.stats.Log2Transform;
import umcg.genetica.math.stats.QuantileNormalization;

public class QCPCA {
    private boolean useCorrelationMatrix = false;
    private boolean LDpruning = false;

    public void run(String expressionLoc, String expressionPlatform, String genotypeLoc, String gte, String probeannotation, String outputdirectory, String prunedSNPListFile) {
        try {
            int i;
            int j;
            if (!outputdirectory.endsWith("/")) {
                outputdirectory = outputdirectory + "/";
            }
            Gpio.createDir(outputdirectory);
            TriTyperGeneticalGenomicsDatasetSettings settings = new TriTyperGeneticalGenomicsDatasetSettings();
            settings.expressionLocation = expressionLoc;
            settings.expressionplatform = expressionPlatform;
            settings.genotypeLocation = genotypeLoc;
            settings.genotypeToExpressionCoupling = gte;
            settings.probeannotation = probeannotation;
            TriTyperGeneticalGenomicsDataset ds = new TriTyperGeneticalGenomicsDataset(settings);
            SNPLoader loader = ds.getGenotypeData().createSNPLoader();
            int numsamples = ds.getTotalGGSamples();
            int[] indWGA = ds.getExpressionToGenotypeIdArray();
            ArrayList<Integer> ldSNPs = prunedSNPListFile == null && this.LDpruning ? this.pruneSNPsByLDThreshold(ds, loader) : (prunedSNPListFile != null ? this.loadPrunedSNPListFromFile(prunedSNPListFile, ds) : this.pruneSNPsByMLRegressionPCA(ds, loader));
            System.out.println("Copying data to array");
            ProgressBar pb = new ProgressBar(ldSNPs.size());
            double[][] datatmp = new double[numsamples][ldSNPs.size()];
            HashSet<Integer> snpsWoData = new HashSet<Integer>();
            for (int i2 = 0; i2 < ldSNPs.size(); ++i2) {
                double[] snpdata;
                Integer snpID = ldSNPs.get(i2);
                SNP snpObj = ds.getGenotypeData().getSNPObject(snpID);
                loader.loadGenotypes(snpObj);
                if (loader.hasDosageInformation()) {
                    loader.loadDosage(snpObj);
                }
                if ((snpdata = this.getSNPData(loader, numsamples, indWGA, snpObj, false)) != null) {
                    for (j = 0; j < snpdata.length; ++j) {
                        datatmp[j][i2] = snpdata[j];
                    }
                } else {
                    snpsWoData.add(i2);
                }
                pb.iterate();
            }
            pb.close();
            double[][] datafinal = null;
            if (snpsWoData.size() > 0) {
                System.out.println("Detected " + snpsWoData.size() + " SNPs not passing QC, out of " + ldSNPs.size());
                int numsnpswdata = ldSNPs.size() - snpsWoData.size();
                datafinal = new double[numsamples][numsnpswdata];
                int snpcounter = 0;
                for (int i3 = 0; i3 < ldSNPs.size(); ++i3) {
                    if (snpsWoData.contains(i3)) continue;
                    for (j = 0; j < datatmp.length; ++j) {
                        datatmp[j][snpcounter] = datatmp[j][i3];
                    }
                    ++snpcounter;
                }
            } else {
                datafinal = datatmp;
            }
            double[][] correlationmatrix = new double[numsamples][numsamples];
            if (this.useCorrelationMatrix) {
                correlationmatrix = this.calculatecorrelationmatrix(datafinal, true);
            } else {
                for (int i4 = 0; i4 < numsamples; ++i4) {
                    double[] snpsi = datafinal[i4];
                    for (j = i4 + 1; j < numsamples; ++j) {
                        double corr;
                        double[] snpsj = datafinal[j];
                        int nrSNPsWithGenotypeDataAvailableForBothSamples = 0;
                        double ibsCount = 0.0;
                        for (int s = 0; s < snpsi.length; ++s) {
                            if (snpsi[s] == -1.0 || snpsj[s] == -1.0) continue;
                            double ibsVal = 0.0;
                            if (snpsi[s] == snpsj[s]) {
                                ibsVal = 1.0;
                            } else if (Math.abs(snpsi[s] - snpsj[s]) == 1.0) {
                                ibsVal = 0.5;
                            }
                            ibsCount += ibsVal;
                            ++nrSNPsWithGenotypeDataAvailableForBothSamples;
                        }
                        correlationmatrix[i4][j] = corr = ibsCount / (double)nrSNPsWithGenotypeDataAvailableForBothSamples;
                        correlationmatrix[j][i4] = corr;
                        pb.iterate();
                    }
                    correlationmatrix[i4][i4] = 1.0;
                }
            }
            TextFile corMat = new TextFile(outputdirectory + "snpcorrmat.txt", true);
            for (i = 0; i < correlationmatrix.length; ++i) {
                String output = "";
                for (int j2 = 0; j2 < correlationmatrix.length; ++j2) {
                    output = output + "\t" + correlationmatrix[i][j2];
                }
                corMat.write(output + "\n");
            }
            corMat.close();
            pb.close();
            for (i = 0; i < 10; ++i) {
                String output = "";
                for (int j3 = 0; j3 < 10; ++j3) {
                    output = output + "\t" + correlationmatrix[i][j3];
                }
                System.out.println(output);
            }
            System.out.println("Performing eigenvalue decomposition");
            EigenvalueDecomposition eig = PCA.eigenValueDecomposition(correlationmatrix);
            System.out.println("Getting eigenvalues");
            double[] eigenValues = PCA.getRealEigenvalues(eig);
            System.out.println("Getting eigenvariance");
            double genVarPC1 = PCA.getEigenValueVar(eigenValues, 1);
            System.out.println("Getting eigenvector");
            double[] PC1GenEigenVector = PCA.getEigenVector(eig, eigenValues, 1);
            System.out.println("Getting eigenvector");
            double[] PC2GenEigenVector = PCA.getEigenVector(eig, eigenValues, 2);
            TextFile eigenvectorsout = new TextFile(outputdirectory + "PCAOverSamplesEigenvalues.txt.gz", true);
            double cumVarPCA = 0.0;
            for (int pca = 0; pca < numsamples; ++pca) {
                double varPCA = PCA.getEigenValueVar(eigenValues, pca);
                int pcaNr = pca + 1;
                eigenvectorsout.write(pcaNr + "\t" + varPCA + "\t" + (cumVarPCA += varPCA) + "\n");
                System.out.println("PCA:\t" + pcaNr + "\t" + eigenValues[eigenValues.length - 1 - pca] + "\t" + cumVarPCA);
            }
            eigenvectorsout.close();
            System.out.println("Done");
            System.out.println(genVarPC1);
            for (int i5 = 1; i5 < 11; ++i5) {
                ScatterPlot scat = new ScatterPlot();
                scat.draw(PCA.getEigenVector(eig, eigenValues, i5), PCA.getEigenVector(eig, eigenValues, i5 + 1), "PC" + i5, "PC" + (i5 + 1), "Genetic Eigenvalues", outputdirectory + "SNP-");
            }
            TextFile out = new TextFile(outputdirectory + "EigenVectors-SNPs.txt", true);
            for (int i6 = 0; i6 < numsamples; ++i6) {
                String probeCoefficients = "";
                for (int pc = 1; pc <= numsamples - 1; ++pc) {
                    probeCoefficients = probeCoefficients + "\t" + PCA.getEigenVector(eig, eigenValues, pc)[i6];
                }
                out.write(ds.getExpressionData().getIndividuals()[i6] + "\t" + ds.getGenotypeData().getIndividuals()[indWGA[i6]] + probeCoefficients + "\n");
            }
            out.close();
            double[][] rawData = ds.getExpressionData().getMatrix();
            DoubleMatrixDataset dataset = new DoubleMatrixDataset(rawData.length, rawData[rawData.length - 1].length);
            QuantileNormalization.quantilenormalize(rawData);
            Log2Transform.log2transform(rawData);
            dataset.rowObjects = Arrays.asList(ds.getExpressionData().getProbes());
            dataset.colObjects = Arrays.asList(ds.getExpressionData().getIndividuals());
            dataset.rawData = rawData;
            int nrProbes = dataset.rowObjects.size();
            int nrSamples = dataset.colObjects.size();
            System.out.println("Standardizing probe mean and standard deviation");
            for (int p = 0; p < dataset.rowObjects.size(); ++p) {
                double mean = Descriptives.mean(rawData[p]);
                double stdev = Math.sqrt(Descriptives.variance(rawData[p], mean));
                int s = 0;
                while (s < dataset.colObjects.size()) {
                    double[] dArray = rawData[p];
                    int n = s++;
                    dArray[n] = dArray[n] - mean;
                }
            }
            System.out.println("- Standardizing sample mean and standard deviation");
            for (int s = 0; s < nrSamples; ++s) {
                double[] vals = new double[nrProbes];
                for (int p = 0; p < nrProbes; ++p) {
                    vals[p] = dataset.rawData[p][s];
                }
                double mean = Descriptives.mean(vals);
                int p = 0;
                while (p < nrProbes) {
                    int n = p++;
                    vals[n] = vals[n] - mean;
                }
                double var = Descriptives.variance(vals, mean);
                double stdev = Math.sqrt(var);
                for (int p2 = 0; p2 < nrProbes; ++p2) {
                    dataset.rawData[p2][s] = vals[p2] / stdev;
                }
            }
            System.out.print("- Calculating correlations between all " + nrSamples + " samples: ");
            double[][] correlationMatrix = new double[nrSamples][nrSamples];
            double probeCountMinusOne = nrProbes - 1;
            ProgressBar pv2 = new ProgressBar(nrSamples * nrSamples);
            for (int f = 0; f < nrSamples; ++f) {
                for (int g = f; g < nrSamples; ++g) {
                    double covariance;
                    double covarianceInterim = 0.0;
                    for (int p = 0; p < nrProbes; ++p) {
                        covarianceInterim += dataset.rawData[p][f] * dataset.rawData[p][g];
                    }
                    correlationMatrix[f][g] = covariance = covarianceInterim / probeCountMinusOne;
                    correlationMatrix[g][f] = covariance;
                    pv2.iterate();
                    pv2.iterate();
                }
            }
            pv2.close();
            System.out.println("100%");
            System.out.println("Performing eigenvalue decomposition");
            EigenvalueDecomposition eigExp = PCA.eigenValueDecomposition(correlationmatrix);
            System.out.println("Getting eigenvalues");
            double[] eigenValuesExp = PCA.getRealEigenvalues(eigExp);
            System.out.println("Getting eigenvariance");
            double expVarPC1 = PCA.getEigenValueVar(eigenValuesExp, 1);
            System.out.println("Getting eigenvector");
            double[] PC1ExpEigenVector = PCA.getEigenVector(eigExp, eigenValuesExp, 1);
            System.out.println("Getting eigenvector");
            double[] PC2ExpEigenVector = PCA.getEigenVector(eigExp, eigenValuesExp, 2);
            double[][] correlationmatrix2 = new double[numsamples][numsamples];
            pb = new ProgressBar(numsamples * numsamples);
            pb.print();
            for (int i7 = 1; i7 < 11; ++i7) {
                ScatterPlot scat = new ScatterPlot();
                scat.draw(PCA.getEigenVector(eigExp, eigenValuesExp, i7), PCA.getEigenVector(eigExp, eigenValuesExp, i7 + 1), "PC" + i7, "PC" + (i7 + 1), "Expression Eigenvalues", outputdirectory + "Exp-");
            }
            if (numsamples > 100) {
                numsamples = 100;
            }
            double bonferroni = 0.05 / (double)(numsamples * numsamples);
            System.out.println("Determining significant correlations between genetic PCs and expression PCs");
            System.out.println("Threshold: " + bonferroni);
            for (int pc = 1; pc <= numsamples - 1; ++pc) {
                double[] genEig = PCA.getEigenVector(eig, eigenValues, pc);
                for (int pc2 = pc; pc2 <= numsamples - 1; ++pc2) {
                    double corr;
                    double[] expEig = PCA.getEigenVector(eigExp, eigenValuesExp, pc2);
                    correlationmatrix2[pc][pc2] = corr = ArrayMath.correlation((double[])genEig, (double[])expEig);
                    correlationmatrix2[pc2][pc] = corr;
                    ScatterPlot scat = new ScatterPlot();
                    int df = numsamples - 2;
                    StudentT tDistColt = new StudentT((double)df, (DoubleRandomEngine)new DRand());
                    double t = corr / Math.sqrt((1.0 - corr * corr) / (double)df);
                    double tTestPValue1 = tDistColt.cdf(t);
                    if (tTestPValue1 < bonferroni) {
                        System.out.println(corr + "\t" + t + "\t" + tTestPValue1);
                        scat.draw(genEig, expEig, "SNP" + pc, "EXP" + pc2, "SNP vs Gene expression PC" + pc + ", corr: " + corr + ", pval: " + tTestPValue1, outputdirectory + "SNPvsEXP");
                    }
                    pb.iterate();
                }
            }
            pb.close();
            out = new TextFile(outputdirectory + "SNP-PCvsExp-PC.txt", true);
            for (int i8 = 0; i8 < numsamples; ++i8) {
                String probeCoefficients = "";
                for (int pc = 1; pc <= numsamples - 1; ++pc) {
                    probeCoefficients = probeCoefficients + "\t" + correlationmatrix2[i8][pc];
                }
                out.write(i8 + probeCoefficients + "\n");
            }
            out.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private double[] getSampleData(double[][] expressiondata, int sample) {
        double[] data = new double[expressiondata.length];
        for (int i = 0; i < expressiondata.length; ++i) {
            data[i] = expressiondata[i][sample];
        }
        return data;
    }

    public double[] getSNPData(SNPLoader loader, int numsamples, int[] indWGA, SNP snpObj, boolean normalizegenotypedata) throws IOException {
        loader.loadGenotypes(snpObj);
        if (loader.hasDosageInformation()) {
            loader.loadDosage(snpObj);
        }
        double[] tmpData = new double[numsamples];
        if (normalizegenotypedata) {
            if (loader.hasDosageInformation()) {
                double[] genotypes = snpObj.getDosageValues();
                int numinds = 0;
                for (int i = 0; i < indWGA.length; ++i) {
                    if (indWGA[i] == -1) continue;
                    tmpData[numinds] = genotypes[indWGA[i]];
                    ++numinds;
                }
                double mean = ArrayMath.mean((double[])tmpData);
                double stdev = ArrayMath.standardDeviation((double[])tmpData);
                int i = 0;
                while (i < tmpData.length) {
                    int n = i;
                    tmpData[n] = tmpData[n] - mean;
                    int n2 = i++;
                    tmpData[n2] = tmpData[n2] / stdev;
                }
            } else {
                byte[] genotypes = snpObj.getGenotypes();
                int numinds = 0;
                int numIndsWithoutGenotypes = 0;
                for (int i = 0; i < indWGA.length; ++i) {
                    if (indWGA[i] == -1) continue;
                    if (genotypes[indWGA[i]] == -1) {
                        ++numIndsWithoutGenotypes;
                    }
                    tmpData[numinds] = genotypes[indWGA[i]];
                    ++numinds;
                }
                double[] tmpData2 = new double[numsamples - numIndsWithoutGenotypes];
                int j = 0;
                for (int i = 0; i < tmpData.length; ++i) {
                    if (!(tmpData[i] > 0.0)) continue;
                    tmpData2[j] = tmpData[i];
                    ++j;
                }
                double mean = ArrayMath.mean((double[])tmpData2);
                double stdev = ArrayMath.standardDeviation((double[])tmpData2);
                int i = 0;
                while (i < tmpData.length) {
                    if (tmpData[i] < 0.0) {
                        tmpData[i] = mean;
                    }
                    int n = i;
                    tmpData[n] = tmpData[n] - mean;
                    int n3 = i++;
                    tmpData[n3] = tmpData[n3] / stdev;
                }
            }
        } else {
            byte[] genotypes = snpObj.getGenotypes();
            int numinds = 0;
            int numIndsWithoutGenotypes = 0;
            for (int i = 0; i < indWGA.length; ++i) {
                if (indWGA[i] == -1) continue;
                if (genotypes[indWGA[i]] == -1) {
                    ++numIndsWithoutGenotypes;
                }
                tmpData[numinds] = genotypes[indWGA[i]];
                ++numinds;
            }
        }
        snpObj.clearGenotypes();
        return tmpData;
    }

    private ArrayList<Integer> loadPrunedSNPListFromFile(String prunedSNPListFile, TriTyperGeneticalGenomicsDataset ds) throws IOException {
        System.out.println("Loading list of pruned SNPs from text file: " + prunedSNPListFile);
        TextFile tf = new TextFile(prunedSNPListFile, false);
        String[] list = tf.readAsArray();
        tf.close();
        ArrayList<Integer> ldSNPs = new ArrayList<Integer>();
        for (String s : list) {
            Integer snpId = ds.getGenotypeData().getSnpToSNPId().get((Object)s);
            if (snpId == -9) continue;
            ldSNPs.add(snpId);
        }
        System.out.println(ldSNPs.size() + " out of " + list.length + " SNPs in the pruned SNP list detected.");
        return ldSNPs;
    }

    private ArrayList<Integer> pruneSNPsByLDThreshold(TriTyperGeneticalGenomicsDataset ds, SNPLoader loader) {
        System.out.println("Pruning SNPs for LD");
        ArrayList<Integer> ldSNPs = new ArrayList<Integer>();
        DetermineLD ldcalc = new DetermineLD();
        try {
            for (int chr = 1; chr < 23; ++chr) {
                HashSet<Integer> snpsVisited = new HashSet<Integer>();
                ArrayList<Integer> snpsForChr = this.getSortedListOfSNPsForChr(chr, ds);
                int numSNPsAfterPruning = 0;
                if (snpsForChr == null) {
                    System.out.println("No SNPs for Chr: " + chr);
                    continue;
                }
                int startsnpnum = 0;
                ProgressBar pb = new ProgressBar(snpsForChr.size());
                for (startsnpnum = 0; startsnpnum < snpsForChr.size(); ++startsnpnum) {
                    int snpID = snpsForChr.get(startsnpnum);
                    if (!snpsVisited.contains(snpID)) {
                        SNP snpObj = ds.getGenotypeData().getSNPObject(snpID);
                        loader.loadGenotypes(snpObj);
                        if (loader.hasDosageInformation()) {
                            loader.loadDosage(snpObj);
                        }
                        if (snpObj.passesQC() && snpObj.getMAF() > 0.05 && snpObj.getCR() > 0.95 && snpObj.getHWEP() > 1.0E-4) {
                            ldSNPs.add(snpID);
                            for (int querysnpnum = startsnpnum + 1; querysnpnum < snpsForChr.size(); ++querysnpnum) {
                                int snpID2 = snpsForChr.get(querysnpnum);
                                SNP snpObj2 = ds.getGenotypeData().getSNPObject(snpID2);
                                loader.loadGenotypes(snpObj2);
                                if (loader.hasDosageInformation()) {
                                    loader.loadDosage(snpObj2);
                                }
                                if (snpObj2.passesQC() && snpObj2.getMAF() > 0.05 && snpObj2.getCR() > 0.95 && snpObj2.getHWEP() > 1.0E-4) {
                                    double r2 = ldcalc.getRSquared(snpObj, snpObj2, ds.getGenotypeData(), 4, 1, false);
                                    double dp = ldcalc.getRSquared(snpObj, snpObj2, ds.getGenotypeData(), 5, 1, false);
                                    if (r2 < 0.1 && dp < 0.5) {
                                        snpObj2.clearGenotypes();
                                        startsnpnum = querysnpnum;
                                        break;
                                    }
                                    snpObj2.clearGenotypes();
                                    snpsVisited.add(snpID2);
                                } else {
                                    snpObj2.clearGenotypes();
                                    snpsVisited.add(snpID2);
                                }
                                pb.iterate();
                            }
                        }
                        snpObj.clearGenotypes();
                    }
                    snpsVisited.add(snpID);
                    ++numSNPsAfterPruning;
                    pb.iterate();
                }
                pb.close();
                System.out.println(numSNPsAfterPruning + " SNPs left after pruning, out of " + snpsForChr.size() + "\t" + ldSNPs.size() + " total.");
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println(ldSNPs.size() + " pruned SNPs");
        return ldSNPs;
    }

    private ArrayList<Integer> getSortedListOfSNPsForChr(int chr, TriTyperGeneticalGenomicsDataset ds) {
        String[] snps = ds.getGenotypeData().getSNPs();
        ArrayList<Integer> snpsOnChr = new ArrayList<Integer>();
        boolean numsnps = false;
        for (int i = 0; i < snps.length; ++i) {
            byte snpchr = ds.getGenotypeData().getChr(i);
            if (snpchr != chr) continue;
            snpsOnChr.add(i);
        }
        ArrayList<SortableSNP> snpsSorted = new ArrayList<SortableSNP>();
        for (Integer i : snpsOnChr) {
            int chrPos = ds.getGenotypeData().getChrPos(i);
            if (chrPos <= -1) continue;
            snpsSorted.add(new SortableSNP(null, i, (byte)chr, chrPos, SortableSNP.SORTBY.CHRPOS));
        }
        int numTotalOnChr = snpsOnChr.size();
        Collections.sort(snpsSorted);
        snpsOnChr = new ArrayList();
        for (SortableSNP s : snpsSorted) {
            snpsOnChr.add(s.id);
        }
        int prevpos = -1;
        for (Integer i : snpsOnChr) {
            Integer chrPos = ds.getGenotypeData().getChrPos(i);
            if (prevpos == -1) {
                prevpos = chrPos;
                continue;
            }
            if (chrPos >= prevpos) continue;
            System.out.println("SNPs are not sorted!!");
            for (int j = 0; j < snpsOnChr.size(); ++j) {
                Integer snpid = snpsOnChr.get(j);
                System.out.println(j + "\t" + snpsOnChr.get(j) + "\t" + ds.getGenotypeData().getChrPos(snpid));
            }
            System.exit(0);
        }
        System.out.println("Chr " + chr + " has " + snpsOnChr.size() + " SNPs with annotation, out of " + numTotalOnChr);
        return snpsOnChr;
    }

    private ArrayList<Integer> pruneSNPsByMLRegressionPCA(TriTyperGeneticalGenomicsDataset ds, SNPLoader loader) throws IOException {
        ArrayList<Integer> ldSNPs = new ArrayList<Integer>();
        int numsamples = ds.getTotalGGSamples();
        int[] indWGA = ds.getExpressionToGenotypeIdArray();
        int windowsize = 50;
        int windowshift = 5;
        int vifthreshold = 2;
        double[][] snpdata = new double[windowsize][numsamples];
        int totalafterpruning = 0;
        for (int chr = 1; chr < 23; ++chr) {
            HashSet visitedSNPs = new HashSet();
            ArrayList<Integer> sortedSNPs = this.getSortedListOfSNPsForChr(chr, ds);
            if (sortedSNPs.size() < windowsize) {
                System.out.println("Chromosome " + chr + " has less than " + windowsize + " SNPs for pruning");
            } else {
                int numwindowsremainder = sortedSNPs.size() % windowsize;
                int numwindows = (sortedSNPs.size() - numwindowsremainder) / windowsize;
                System.out.println("Pruning SNPs for chromosome: " + chr);
                ProgressBar pb = new ProgressBar(sortedSNPs.size());
                boolean window = false;
                int startsnp = 0;
                while (startsnp + windowsize < sortedSNPs.size()) {
                    ArrayList<Integer> snpsInThisWindow = new ArrayList<Integer>();
                    int s = 0;
                    int currentsnp = startsnp;
                    boolean fullwindow = true;
                    while (s < windowsize) {
                        if (currentsnp == sortedSNPs.size()) {
                            fullwindow = false;
                            break;
                        }
                        Integer snpid = sortedSNPs.get(currentsnp);
                        SNP snpObj = ds.getGenotypeData().getSNPObject(snpid);
                        double[] snpData = this.getSNPData(loader, numsamples, indWGA, snpObj, true);
                        if (snpData == null) {
                            ++startsnp;
                        } else {
                            snpdata[s] = snpData;
                            snpsInThisWindow.add(snpid);
                            ++s;
                        }
                        ++currentsnp;
                    }
                    if (fullwindow) {
                        int tmpwindow = windowsize - 1;
                        for (int snp1 = 0; snp1 < 2; ++snp1) {
                            double[][] pcscores = new double[tmpwindow][numsamples];
                            double[][] correlationmatrix = this.calculatecorrelationmatrix(snpdata, false, snp1);
                            EigenvalueDecomposition eig = PCA.eigenValueDecomposition(correlationmatrix);
                            double[] eigenValues = eig.getRealEigenvalues();
                            double[][] eigenvectors = new double[tmpwindow][tmpwindow];
                            for (int i = 0; i < tmpwindow; ++i) {
                                eigenvectors[i] = PCA.getEigenVector(eig, i);
                            }
                            for (int pc = 0; pc < tmpwindow; ++pc) {
                                for (int snp = 0; snp < tmpwindow; ++snp) {
                                    for (int sample = 0; sample < tmpwindow; ++sample) {
                                        double probecoefficient = eigenvectors[pc][snp];
                                        if (snp >= snp1) {
                                            double[] dArray = pcscores[pc];
                                            int n = sample;
                                            dArray[n] = dArray[n] + snpdata[snp + 1][sample] * probecoefficient;
                                            continue;
                                        }
                                        double[] dArray = pcscores[pc];
                                        int n = sample;
                                        dArray[n] = dArray[n] + snpdata[snp][sample] * probecoefficient;
                                    }
                                }
                            }
                            double sum = 0.0;
                            for (int snp2 = 0; snp2 < tmpwindow; ++snp2) {
                                double corr = ArrayMath.correlation((double[])snpdata[snp1], (double[])pcscores[snp2]);
                                sum += corr * corr;
                                System.out.println(snp1 + "\t" + snp2 + "\t" + corr + "\t" + corr * corr);
                            }
                            System.out.println(snp1 + "\t" + sum);
                            double vif = 1.0 / Math.abs(1.0 - sum);
                        }
                        System.exit(0);
                    }
                    pb.set(startsnp);
                    startsnp += windowshift;
                }
                pb.close();
            }
            System.out.println("SNPs after pruning: " + visitedSNPs.size());
            totalafterpruning += visitedSNPs.size();
        }
        System.out.println(totalafterpruning);
        System.exit(0);
        return ldSNPs;
    }

    private double[][] calculatecorrelationmatrix(double[][] data, boolean verbose) {
        return this.calculatecorrelationmatrix(data, verbose, null);
    }

    private double[][] calculatecorrelationmatrix(double[][] data, boolean verbose, Integer skip) {
        ProgressBar pb = null;
        if (verbose) {
            System.out.println("Calculating correlation matrix");
            pb = new ProgressBar(data.length);
            pb.print();
        }
        int numrows = data.length;
        double[][] correlationmatrix = null;
        if (skip != null) {
            correlationmatrix = new double[numrows - 1][numrows - 1];
            int rows = 0;
            for (int i = 0; i < numrows; ++i) {
                if (i == skip) continue;
                int cols = 0;
                for (int j = i + 1; j < numrows; ++j) {
                    double corr;
                    if (j == skip) continue;
                    correlationmatrix[rows][cols] = corr = ArrayMath.correlation((double[])data[i], (double[])data[j]);
                    correlationmatrix[cols][rows] = corr;
                    ++cols;
                }
                correlationmatrix[rows][rows] = 1.0;
                ++rows;
            }
        } else {
            correlationmatrix = new double[numrows][numrows];
            for (int i = 0; i < numrows; ++i) {
                for (int j = i + 1; j < numrows; ++j) {
                    double corr;
                    correlationmatrix[i][j] = corr = ArrayMath.correlation((double[])data[i], (double[])data[j]);
                    correlationmatrix[j][i] = corr;
                }
                if (verbose) {
                    pb.iterate();
                }
                correlationmatrix[i][i] = 1.0;
            }
            if (verbose) {
                pb.close();
            }
        }
        return correlationmatrix;
    }
}

