/*
 * Decompiled with CFR 0.152.
 */
package eqtlmappingpipeline.metaqtl3;

import JSci.maths.ArrayMath;
import Jama.EigenvalueDecomposition;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import umcg.genetica.console.ProgressBar;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.trityper.EQTL;
import umcg.genetica.io.trityper.SNP;
import umcg.genetica.io.trityper.SNPLoader;
import umcg.genetica.io.trityper.TriTyperExpressionData;
import umcg.genetica.io.trityper.TriTyperGeneticalGenomicsDataset;
import umcg.genetica.io.trityper.eQTLTextFile;
import umcg.genetica.math.PCA;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.Regression;

public class EQTLRegression {
    TriTyperGeneticalGenomicsDataset[] gg;
    EQTL[] eqtlsToRegressOut;

    public void regressOutEQTLEffects(ArrayList<Pair<String, String>> eqtls, TriTyperGeneticalGenomicsDataset[] gg) throws IOException {
        this.gg = gg;
        this.eqtlsToRegressOut = new EQTL[eqtls.size()];
        for (int q = 0; q < eqtls.size(); ++q) {
            this.eqtlsToRegressOut[q] = new EQTL();
            this.eqtlsToRegressOut[q].setRsName(eqtls.get(q).getLeft());
            this.eqtlsToRegressOut[q].setProbe(eqtls.get(q).getRight());
        }
        System.out.println("About to regress out: " + eqtls.size() + " QTLs from data.");
        this.regressOutEQTLEffects();
    }

    public void regressOutEQTLEffects(EQTL[] eqtls, TriTyperGeneticalGenomicsDataset[] gg) throws IOException {
        this.gg = gg;
        this.eqtlsToRegressOut = eqtls;
        System.out.println("About to regress out: " + eqtls.length + " QTLs from data.");
        this.regressOutEQTLEffects();
    }

    public void regressOutEQTLEffects(String regressOutEQTLEffectFileName, boolean outputfiles, TriTyperGeneticalGenomicsDataset[] gg) throws IOException {
        this.gg = gg;
        System.out.println("\n\n\nRemoving eQTL effects from the following eQTL file: '" + regressOutEQTLEffectFileName);
        eQTLTextFile in = new eQTLTextFile(regressOutEQTLEffectFileName, false);
        this.eqtlsToRegressOut = in.read();
        in.close();
        System.out.println("Number of eQTLs to regress out found in file:\t" + this.eqtlsToRegressOut.length);
        this.regressOutEQTLEffects();
        if (outputfiles) {
            for (int d = 0; d < gg.length; ++d) {
                TriTyperGeneticalGenomicsDataset ds = gg[d];
                TriTyperExpressionData dsexp = ds.getExpressionData();
                double[][] matrix = dsexp.getMatrix();
                String[] probes = dsexp.getProbes();
                String[] individuals = dsexp.getIndividuals();
                String filename = ds.getSettings().expressionLocation;
                DoubleMatrixDataset<String, String> dsout = new DoubleMatrixDataset<String, String>(matrix, Arrays.asList(probes), Arrays.asList(individuals));
                dsout.recalculateHashMaps();
                System.out.println("Saving expression file after removal of eQTL effects: " + filename + "-EQTLEffectsRemoved.txt.gz");
                dsout.save(filename + "-EQTLEffectsRemoved.txt.gz");
            }
        }
    }

    private void regressOutEQTLEffects() throws IOException {
        int d;
        HashMap hashProbesCovariates = new HashMap();
        HashMap<EQTL, Integer> hashEQTLIds = new HashMap<EQTL, Integer>();
        int nrProbesWithMultipleCovariates = 0;
        for (int v = 0; v < this.eqtlsToRegressOut.length; ++v) {
            EQTL current = this.eqtlsToRegressOut[v];
            hashEQTLIds.put(current, v);
            String probe = current.getProbe();
            if (!hashProbesCovariates.containsKey(probe)) {
                ArrayList<EQTL> eqtls = new ArrayList<EQTL>();
                eqtls.add(current);
                hashProbesCovariates.put(probe, eqtls);
                continue;
            }
            ((ArrayList)hashProbesCovariates.get(probe)).add(current);
            ++nrProbesWithMultipleCovariates;
        }
        if (nrProbesWithMultipleCovariates > 0) {
            System.out.println("There are:\t" + nrProbesWithMultipleCovariates + "\tprobes for which we want to regress out multiple SNPs. This will be conducted through multiple regression employing PCA.");
        }
        System.out.println("Removing eQTLs:");
        int[] nrEQTLsRegressedOut = new int[this.gg.length];
        int[][] explainedVariancePerEQTLProbe = new int[this.gg.length][101];
        SNPLoader[] ggSNPLoaders = new SNPLoader[this.gg.length];
        boolean dosageInformationPresentForAllDatasets = true;
        for (d = 0; d < this.gg.length; ++d) {
            ggSNPLoaders[d] = this.gg[d].getGenotypeData().createSNPLoader();
            if (ggSNPLoaders[d].hasDosageInformation()) continue;
            dosageInformationPresentForAllDatasets = false;
        }
        for (d = 0; d < this.gg.length; ++d) {
            HashSet hashEQTLsMultipleRegressionRegressedOut = new HashSet();
            HashMap<Integer, Boolean> snpPassesQC = new HashMap<Integer, Boolean>();
            TriTyperGeneticalGenomicsDataset currentDataset = this.gg[d];
            String[] probes = this.gg[d].getExpressionData().getProbes();
            System.out.print("Dataset:\t" + this.gg[d].getSettings().name);
            ProgressBar pgb = new ProgressBar(probes.length);
            for (int p = 0; p < probes.length; ++p) {
                ArrayList covariatesForThisProbe = (ArrayList)hashProbesCovariates.get(probes[p]);
                if (covariatesForThisProbe != null) {
                    double[] y;
                    ArrayList<EQTL> eventualListOfEQTLs = new ArrayList<EQTL>();
                    ArrayList<SNP> snpsForProbe = new ArrayList<SNP>();
                    ArrayList<double[]> xs = new ArrayList<double[]>();
                    ArrayList<Double> meanxs = new ArrayList<Double>();
                    for (EQTL e : covariatesForThisProbe) {
                        Integer snpId;
                        if (hashEQTLsMultipleRegressionRegressedOut.contains(e) || (snpId = Integer.valueOf(this.gg[d].getGenotypeData().getSnpToSNPId().get((Object)e.getRsName()))) == -9 || snpPassesQC.get(snpId) != null && !((Boolean)snpPassesQC.get(snpId)).booleanValue()) continue;
                        SNP currentSNP = currentDataset.getGenotypeData().getSNPObject(snpId);
                        ggSNPLoaders[d].loadGenotypes(currentSNP);
                        if (ggSNPLoaders[d].hasDosageInformation()) {
                            ggSNPLoaders[d].loadDosage(currentSNP);
                        }
                        if (currentSNP.passesQC()) {
                            int[] indWGA = currentDataset.getExpressionToGenotypeIdArray();
                            double[] x = currentSNP.selectGenotypes(indWGA);
                            double meanX = ArrayMath.mean((double[])x);
                            double varianceX = ArrayMath.variance((double[])x);
                            int i = 0;
                            while (i < x.length) {
                                int n = i++;
                                x[n] = x[n] - meanX;
                            }
                            if (varianceX != 0.0) {
                                eventualListOfEQTLs.add(e);
                                snpsForProbe.add(currentSNP);
                                xs.add(x);
                                meanxs.add(meanX);
                                snpPassesQC.put(snpId, true);
                                continue;
                            }
                            snpPassesQC.put(snpId, false);
                            continue;
                        }
                        snpPassesQC.put(snpId, false);
                        currentSNP.clearGenotypes();
                    }
                    if (eventualListOfEQTLs.size() == 1) {
                        int s;
                        double varianceY;
                        double meanY;
                        SNP currentSNP = (SNP)snpsForProbe.get(0);
                        int[] expressionToGenotypeId = currentDataset.getExpressionToGenotypeIdArray();
                        double[] x = (double[])xs.get(0);
                        double meanX = (Double)meanxs.get(0);
                        double[][] rawData = currentDataset.getExpressionData().getMatrix();
                        int nrSamplesWGenotypeData = x.length;
                        y = new double[nrSamplesWGenotypeData];
                        int totalGGSamples = currentDataset.getTotalGGSamples();
                        if (nrSamplesWGenotypeData == totalGGSamples) {
                            meanY = currentDataset.getExpressionData().getProbeMean()[p];
                            varianceY = currentDataset.getExpressionData().getProbeVariance()[p];
                            for (int s2 = 0; s2 < totalGGSamples; ++s2) {
                                y[s2] = rawData[p][s2] - meanY;
                            }
                        } else {
                            int itr = 0;
                            for (int s3 = 0; s3 < rawData[p].length; ++s3) {
                                double dVal;
                                byte genotype;
                                int genotypeId = expressionToGenotypeId[s3];
                                if (genotypeId == -1 || (genotype = currentSNP.getGenotypes()[genotypeId]) == -1 || !currentDataset.getGenotypeData().getIsIncluded()[genotypeId].booleanValue()) continue;
                                y[itr] = dVal = rawData[p][s3];
                                ++itr;
                            }
                            meanY = ArrayMath.mean((double[])y);
                            varianceY = ArrayMath.variance((double[])y);
                            int i = 0;
                            while (i < y.length) {
                                int n = i++;
                                y[n] = y[n] - meanY;
                            }
                        }
                        double[] rc = Regression.getLinearRegressionCoefficients(x, y);
                        double correlation = ArrayMath.correlation((double[])x, (double[])y);
                        double propExplainedVarianceTrait = correlation * correlation - 1.0 / (double)y.length;
                        if (propExplainedVarianceTrait < 0.0) {
                            propExplainedVarianceTrait = 0.0;
                        }
                        int[] nArray = explainedVariancePerEQTLProbe[d];
                        int n = (int)Math.round(propExplainedVarianceTrait * 100.0);
                        nArray[n] = nArray[n] + 1;
                        double[] rawDataUpdated = new double[totalGGSamples];
                        if (nrSamplesWGenotypeData == totalGGSamples) {
                            for (s = 0; s < totalGGSamples; ++s) {
                                double residual;
                                rawDataUpdated[s] = residual = y[s] - x[s] * rc[0];
                            }
                        } else {
                            for (s = 0; s < totalGGSamples; ++s) {
                                int ind = expressionToGenotypeId[s];
                                if (ind == -1) continue;
                                double valX = currentSNP.getGenotypes()[ind];
                                valX = valX == -1.0 ? 0.0 : (valX -= meanX);
                                rawDataUpdated[s] = rawData[p][s] - valX * rc[0];
                            }
                        }
                        double meanUpdated = ArrayMath.mean((double[])rawDataUpdated);
                        double stdDevRatio = ArrayMath.standardDeviation((double[])rawDataUpdated) / Math.sqrt(varianceY);
                        int s4 = 0;
                        while (s4 < totalGGSamples) {
                            int n2 = s4;
                            rawDataUpdated[n2] = rawDataUpdated[n2] - meanUpdated;
                            int n3 = s4;
                            rawDataUpdated[n3] = rawDataUpdated[n3] / stdDevRatio;
                            int n4 = s4++;
                            rawDataUpdated[n4] = rawDataUpdated[n4] + meanY;
                        }
                        System.arraycopy(rawDataUpdated, 0, rawData[p], 0, totalGGSamples);
                        int n5 = d;
                        nrEQTLsRegressedOut[n5] = nrEQTLsRegressedOut[n5] + 1;
                    } else if (eventualListOfEQTLs.size() > 1 && !dosageInformationPresentForAllDatasets) {
                        System.err.println("Multiple linear regression is not supported for datasets that do not have dosage information.");
                        System.exit(-1);
                    } else if (eventualListOfEQTLs.size() > 1 && dosageInformationPresentForAllDatasets) {
                        double[] x;
                        int pca;
                        int pca2;
                        hashEQTLsMultipleRegressionRegressedOut.addAll(eventualListOfEQTLs);
                        int nrSNPs = snpsForProbe.size();
                        int totalGGSamples = currentDataset.getTotalGGSamples();
                        double[][] dataMatrix = new double[nrSNPs][0];
                        for (int i = 0; i < dataMatrix.length; ++i) {
                            dataMatrix[i] = (double[])xs.get(i);
                        }
                        double[][] correlationMatrix = new double[nrSNPs][nrSNPs];
                        double sampleCountMinusOne = totalGGSamples - 1;
                        for (int f = 0; f < nrSNPs; ++f) {
                            for (int g = f; g < nrSNPs; ++g) {
                                double covariance;
                                double covarianceInterim = 0.0;
                                for (int h = 0; h < totalGGSamples; ++h) {
                                    covarianceInterim += dataMatrix[f][h] * dataMatrix[g][h];
                                }
                                correlationMatrix[f][g] = covariance = covarianceInterim / sampleCountMinusOne;
                                correlationMatrix[g][f] = covariance;
                            }
                        }
                        EigenvalueDecomposition eig = PCA.eigenValueDecomposition(correlationMatrix);
                        double[][] eigenArrayLists = new double[correlationMatrix.length][correlationMatrix.length];
                        for (int pca3 = 0; pca3 < nrSNPs; ++pca3) {
                            eigenArrayLists[pca3] = PCA.getEigenVector(eig, pca3);
                        }
                        double[][] dataMatrixPCScores = new double[nrSNPs][totalGGSamples];
                        for (int sample = 0; sample < totalGGSamples; ++sample) {
                            for (int pca4 = 0; pca4 < nrSNPs; ++pca4) {
                                for (int snp = 0; snp < nrSNPs; ++snp) {
                                    double probeCoefficient = eigenArrayLists[pca4][snp];
                                    double[] dArray = dataMatrixPCScores[pca4];
                                    int n = sample;
                                    dArray[n] = dArray[n] + dataMatrix[snp][sample] * probeCoefficient;
                                }
                            }
                        }
                        TriTyperExpressionData expresionData = currentDataset.getExpressionData();
                        double[][] rawData = currentDataset.getExpressionData().getMatrix();
                        y = new double[totalGGSamples];
                        double meanYOriginal = expresionData.getProbeMean()[p];
                        double varianceYOriginal = expresionData.getProbeVariance()[p];
                        System.arraycopy(rawData[p], 0, y, 0, totalGGSamples);
                        boolean[] regressOutPCA = new boolean[nrSNPs];
                        double[] eigenValues = eig.getRealEigenvalues();
                        boolean atLeastOnePCANotRegressedOut = false;
                        for (pca2 = 0; pca2 < nrSNPs; ++pca2) {
                            regressOutPCA[pca2] = true;
                            if (!(PCA.getEigenValueVar(eigenValues, pca2) < 0.01)) continue;
                            regressOutPCA[pca2] = false;
                            atLeastOnePCANotRegressedOut = true;
                        }
                        if (atLeastOnePCANotRegressedOut) {
                            System.out.println("There is at least one PCA that has not been regressed out as it does not explain a lot of genetic variation!:");
                            for (pca2 = 0; pca2 < nrSNPs; ++pca2) {
                                double[] x2 = dataMatrixPCScores[pca2];
                                double correlation = ArrayMath.correlation((double[])x2, (double[])y);
                                double r2 = correlation * correlation;
                                int pcaNr = pca2 + 1;
                                String snpsStronglyCorrelatedWithPCA = "";
                                for (int snp = 0; snp < nrSNPs; ++snp) {
                                    double correlationPCASNP = Math.abs(ArrayMath.correlation((double[])x2, (double[])dataMatrix[snp]));
                                    double r2PCASNP = correlationPCASNP * correlationPCASNP;
                                    if (!(r2PCASNP > 0.1)) continue;
                                    snpsStronglyCorrelatedWithPCA = snpsStronglyCorrelatedWithPCA + "\t" + ((SNP)snpsForProbe.get(snp)).getName() + ", " + r2PCASNP;
                                }
                                System.out.println(probes[p] + "\tPCA" + pcaNr + "\tExplainedVariance:\t" + PCA.getEigenValueVar(eigenValues, pca2) + "\tEigenvalue:\t" + eigenValues[eigenValues.length - 1 - pca2] + "\tPCATraitR2:\t" + r2 + "\tSNPsStronglyCorrelatedWithPCA:\t" + snpsStronglyCorrelatedWithPCA);
                            }
                            System.out.println("");
                        }
                        double propExplainedVarianceTrait = 0.0;
                        for (pca = 0; pca < nrSNPs; ++pca) {
                            if (!regressOutPCA[pca]) continue;
                            x = dataMatrixPCScores[pca];
                            double correlation = ArrayMath.correlation((double[])x, (double[])y);
                            propExplainedVarianceTrait += correlation * correlation - 1.0 / (double)y.length;
                        }
                        if (propExplainedVarianceTrait < 0.0) {
                            propExplainedVarianceTrait = 0.0;
                        }
                        int[] nArray = explainedVariancePerEQTLProbe[d];
                        int n = (int)Math.round(propExplainedVarianceTrait * 100.0);
                        nArray[n] = nArray[n] + 1;
                        for (pca = 0; pca < nrSNPs; ++pca) {
                            if (!regressOutPCA[pca]) continue;
                            x = dataMatrixPCScores[pca];
                            double[] rc = Regression.getLinearRegressionCoefficients(x, y);
                            for (int s = 0; s < totalGGSamples; ++s) {
                                y[s] = y[s] - x[s] * rc[0];
                            }
                        }
                        double meanYUpdated = ArrayMath.mean((double[])y);
                        double varianceYUpdated = ArrayMath.variance((double[])y);
                        double stdDevRatio = Math.sqrt(varianceYUpdated) / Math.sqrt(varianceYOriginal);
                        int s = 0;
                        while (s < totalGGSamples) {
                            int n6 = s;
                            y[n6] = y[n6] - meanYUpdated;
                            int n7 = s;
                            y[n7] = y[n7] / stdDevRatio;
                            int n8 = s++;
                            y[n8] = y[n8] + meanYOriginal;
                        }
                        for (s = 0; s < totalGGSamples; ++s) {
                            if (Double.isNaN(y[s])) {
                                System.out.println("Error!:\t" + probes[p] + "\t" + this.gg[d].getSettings().name + "\t" + s + "\t" + meanYUpdated + "\t" + stdDevRatio + "\t" + meanYOriginal);
                            }
                            rawData[p][s] = y[s];
                        }
                        int n9 = d;
                        nrEQTLsRegressedOut[n9] = nrEQTLsRegressedOut[n9] + 1;
                    }
                    for (SNP s : snpsForProbe) {
                        s.clearGenotypes();
                    }
                }
                pgb.iterate();
            }
            pgb.print();
            pgb.close();
            System.out.println("");
        }
        for (int ds = 0; ds < this.gg.length; ++ds) {
            ggSNPLoaders[ds].close();
            ggSNPLoaders[ds] = null;
        }
        System.out.println("\n");
        System.out.println("eQTLs regressed per dataset:");
        for (d = 0; d < this.gg.length; ++d) {
            System.out.println(this.gg[d].getSettings().name + "\t" + nrEQTLsRegressedOut[d]);
        }
        System.out.println("\n");
        System.out.println("Proportion explained variance of genotypic variation on eQTLs per dataset:");
        String output = "r2";
        for (TriTyperGeneticalGenomicsDataset gg1 : this.gg) {
            output = output + "\t" + gg1.getSettings().name;
        }
        System.out.println(output);
        for (int e = 0; e <= 100; ++e) {
            double r2 = (double)e / 100.0;
            output = String.valueOf(r2);
            for (int d2 = 0; d2 < this.gg.length; ++d2) {
                output = output + "\t" + explainedVariancePerEQTLProbe[d2][e];
            }
            System.out.println(output);
        }
    }
}

