/*
 * Decompiled with CFR 0.152.
 */
package umcg.genetica.methylation;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.collections.primitives.ArrayDoubleList;
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.Correlation;
import umcg.genetica.math.stats.TTest;
import umcg.genetica.math.stats.ZScores;
import umcg.genetica.methylation.SoftfileAnnotation;

public class AssociatingPcasWithAnnotation {
    private static Pattern SPLIT_ON_TAB = Pattern.compile("\\t");

    public static void main(String[] args) throws IOException, ClassNotFoundException {
        String fileWithAnnotation = "/Data/Sasha/GPL96GPL570AgeSamplesWithRangesAveragedInfantsLeftOut.txt";
        String datafile = "/Data/GeneExpressionFinal/PCA/GPL570/GPL570ExpressiondataQNOnlyHumanSamplesOnlyENSGsCollapsed.binary";
        System.out.print("Read annotation file .... ");
        HashMap<String, SoftfileAnnotation> sampleAnnotation = AssociatingPcasWithAnnotation.readAnnotationFile(fileWithAnnotation);
        System.out.println("done");
        TextFile tf = new TextFile("/Data/GeneExpressionFinal/SampleAnnotation/GPL570/GPL570CellLineSamplesAsPerTextMiningAndCorrelationWithCellLineProfile.txt", false);
        ArrayList<String> kickOutSamples = new ArrayList<String>(tf.readAsArrayList());
        System.out.println(kickOutSamples.size() + " samples will be kicked out");
        System.out.print("Read data file .... ");
        DoubleMatrixDataset<String, String> data = AssociatingPcasWithAnnotation.readDoubleMatrixFileWithOutGivenColumns(datafile, kickOutSamples);
        System.out.println("done");
        String infoKey = "Age";
        HashMap<String, HashMap<String, String>> interestSets = AssociatingPcasWithAnnotation.selectSamplesWithInformationOfInterest(sampleAnnotation, infoKey, data, false);
        System.out.println("Number of interest sets: " + interestSets.size());
        AssociatingPcasWithAnnotation.correlateScoreAndItemOfInterest(data, interestSets, "/Data/Sasha/GenesCorrelatedWithAgeGPL570CellLinesExcludedLeaveOneOut.txt", false);
    }

    private static HashMap<String, SoftfileAnnotation> readAnnotationFile(String fileWithAnnotation) throws IOException {
        TextFile tf = new TextFile("/Data/GeneExpressionFinal/SampleAnnotation/GSMToGenericGSEName-GSE2109SplitPerTissue.txt", false);
        Map<String, String> gsm2gse = tf.readAsHashMap(0, 1);
        HashMap<String, SoftfileAnnotation> sampleInfo = new HashMap<String, SoftfileAnnotation>();
        try {
            TextFile in = new TextFile(fileWithAnnotation, false);
            String str = in.readLine();
            String[] headers = SPLIT_ON_TAB.split(str);
            int meshInfoIndex = -1;
            for (int i = 1; i < headers.length; ++i) {
                if (!headers[i].toLowerCase().contains("mesh")) continue;
                meshInfoIndex = i;
                break;
            }
            while ((str = in.readLine()) != null) {
                String[] entries = SPLIT_ON_TAB.split(str);
                String gse = gsm2gse.get(entries[0]);
                if (gse == null) {
                    System.out.println("problem");
                }
                entries[2] = gse;
                SoftfileAnnotation tmp = new SoftfileAnnotation();
                tmp.setAccession(entries[0]);
                if (meshInfoIndex >= 0) {
                    tmp.setMeshTerms(entries[meshInfoIndex]);
                }
                for (int i = 1; i < entries.length; ++i) {
                    tmp.putAnnotationInformation(headers[i], entries[i]);
                }
                sampleInfo.put(entries[0], tmp);
            }
            in.close();
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
            System.exit(-1);
        }
        return sampleInfo;
    }

    private static DoubleMatrixDataset<String, String> readDoubleMatrixFile(String eigenVectorFile) {
        return AssociatingPcasWithAnnotation.readDoubleMatrixFile(eigenVectorFile, null);
    }

    private static DoubleMatrixDataset<String, String> readDoubleMatrixFile(String eigenVectorFile, Set<String> rowsToInclude) {
        DoubleMatrixDataset tmp = new DoubleMatrixDataset();
        try {
            tmp = rowsToInclude == null ? new DoubleMatrixDataset(eigenVectorFile) : new DoubleMatrixDataset(eigenVectorFile, null, rowsToInclude);
        }
        catch (IOException ex) {
            Logger.getLogger(AssociatingPcasWithAnnotation.class.getName()).log(Level.SEVERE, null, ex);
        }
        return tmp;
    }

    private static DoubleMatrixDataset<String, String> readDoubleMatrixFileWithOutGivenColumns(String eigenVectorFile, ArrayList<String> columnsToExclude) throws IOException, ClassNotFoundException {
        List<Object> columnObjectsOnly = DoubleMatrixDataset.getColumnObjectsOnly(eigenVectorFile);
        HashSet<Object> colsToRetain = new HashSet<Object>(columnObjectsOnly);
        colsToRetain.removeAll(columnsToExclude);
        DoubleMatrixDataset tmp = new DoubleMatrixDataset(eigenVectorFile, null, colsToRetain);
        return tmp;
    }

    private static HashMap<String, HashMap<String, String>> selectSamplesWithInformationOfInterest(HashMap<String, SoftfileAnnotation> sampleAnnotation, String infoKey, DoubleMatrixDataset<String, String> eigenVectors, boolean samplesOnRows) {
        Map.Entry<String, SoftfileAnnotation> tmp;
        HashMap<String, HashMap<String, String>> gseSets = new HashMap<String, HashMap<String, String>>();
        ArrayList<String> removeSamples = new ArrayList<String>();
        Iterator<Map.Entry<String, SoftfileAnnotation>> i$ = sampleAnnotation.entrySet().iterator();
        if (i$.hasNext() && !(tmp = i$.next()).getValue().getAnnotationInformation().containsKey(infoKey)) {
            System.out.print("No " + infoKey + " information");
            System.exit(0);
        }
        for (Map.Entry<String, SoftfileAnnotation> sample : sampleAnnotation.entrySet()) {
            if (!sample.getValue().getAnnotationInformation().get(infoKey).isEmpty() || !sample.getValue().getAnnotationInformation().get(infoKey).equals("")) {
                boolean contains = samplesOnRows ? eigenVectors.rowObjects.contains(sample.getKey()) : eigenVectors.colObjects.contains(sample.getKey());
                if (contains) {
                    String seriesId = sample.getValue().getAnnotationInformation().get("series id");
                    if (gseSets.containsKey(seriesId)) {
                        ((HashMap)gseSets.get(seriesId)).put(sample.getKey(), sample.getValue().getAnnotationInformation().get(infoKey));
                        continue;
                    }
                    HashMap<String, String> tmp2 = new HashMap<String, String>();
                    tmp2.put(sample.getKey(), sample.getValue().getAnnotationInformation().get(infoKey));
                    gseSets.put(seriesId, tmp2);
                    continue;
                }
                removeSamples.add(sample.getKey());
                continue;
            }
            removeSamples.add(sample.getKey());
        }
        ArrayList removeGseSets = new ArrayList();
        int numberOfInterestSets = 0;
        int numberOfInterestSamples = 0;
        if (gseSets.size() > 0) {
            for (Map.Entry gse : gseSets.entrySet()) {
                ArrayList uniqueValues = new ArrayList();
                for (Map.Entry sample : ((HashMap)gse.getValue()).entrySet()) {
                    if (uniqueValues.contains(sample.getValue())) continue;
                    uniqueValues.add(sample.getValue());
                }
                if (uniqueValues.size() >= 2 && ((HashMap)gse.getValue()).size() >= 10) {
                    ++numberOfInterestSets;
                    for (Map.Entry sample : ((HashMap)gse.getValue()).entrySet()) {
                        ++numberOfInterestSamples;
                    }
                    continue;
                }
                removeGseSets.add(gse.getKey());
                for (Map.Entry sample : ((HashMap)gse.getValue()).entrySet()) {
                    removeSamples.add((String)sample.getKey());
                }
            }
        } else {
            System.out.println("Unforeseen error check Key and code");
            System.exit(0);
        }
        System.out.println("Number of sets: " + numberOfInterestSets);
        System.out.println("Total samples of interest: " + numberOfInterestSamples);
        for (String removeEntry : removeGseSets) {
            gseSets.remove(removeEntry);
        }
        for (String removeEntry : removeSamples) {
            sampleAnnotation.remove(removeEntry);
        }
        return gseSets;
    }

    private static HashMap<String, HashMap<String, String>> selectSamplesWithSeriesInformation(HashMap<String, SoftfileAnnotation> sampleAnnotation, DoubleMatrixDataset<String, String> eigenVectors) {
        HashMap<String, HashMap<String, String>> gseSets = new HashMap<String, HashMap<String, String>>();
        ArrayList<String> removeSamples = new ArrayList<String>();
        for (Map.Entry<String, SoftfileAnnotation> sample : sampleAnnotation.entrySet()) {
            if (!sample.getValue().getAnnotationInformation().get("series id").isEmpty() || !sample.getValue().getAnnotationInformation().get("series id").equals("")) {
                if (!eigenVectors.rowObjects.contains(sample.getKey())) continue;
                String seriesId = sample.getValue().getAnnotationInformation().get("series id");
                if (gseSets.containsKey(seriesId)) {
                    gseSets.get(seriesId).put(sample.getKey(), sample.getValue().getAnnotationInformation().get("series id"));
                    continue;
                }
                HashMap<String, String> tmp = new HashMap<String, String>();
                tmp.put(sample.getKey(), sample.getValue().getAnnotationInformation().get("series id"));
                gseSets.put(seriesId, tmp);
                continue;
            }
            removeSamples.add(sample.getKey());
        }
        for (String removeEntry : removeSamples) {
            sampleAnnotation.remove(removeEntry);
        }
        return gseSets;
    }

    public static void associateScoreAndItemOfInterest(DoubleMatrixDataset<String, String> doubleMatrix, HashMap<String, HashMap<String, String>> interestSets, ArrayList<String> entries) {
        HashMap<String, Double> scorePerGse = new HashMap<String, Double>();
        HashMap<String, Integer> indeces = new HashMap<String, Integer>();
        for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
            for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                if (doubleMatrix.rowObjects.contains(sample.getKey())) {
                    int index = doubleMatrix.rowObjects.indexOf(sample.getKey());
                    indeces.put(sample.getKey(), index);
                    continue;
                }
                System.out.println("Potential mismatch between annotation and samples");
                System.out.println(sample.getKey() + " is not in value matrix");
                System.out.println("\n However :" + indeces.size() + " are in the matrix");
                System.exit(0);
            }
        }
        for (int i = 0; i < doubleMatrix.nrCols; ++i) {
            for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
                ArrayDoubleList valueSet1 = new ArrayDoubleList();
                ArrayDoubleList valueSet2 = new ArrayDoubleList();
                for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                    if (sample.getValue().equals(entries.get(0))) {
                        valueSet1.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                        continue;
                    }
                    if (!sample.getValue().equals(entries.get(1))) continue;
                    valueSet2.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                }
                double[] set1 = valueSet1.toArray(new double[0]);
                double[] set2 = valueSet2.toArray(new double[0]);
                if (set1.length <= 2 || set2.length <= 2) continue;
                double zScore = TTest.testZscore(set1, set2);
                scorePerGse.put((String)doubleMatrix.colObjects.get(i) + "_" + set.getKey(), zScore);
            }
        }
    }

    public static String[] readGeneNamesForProbes(String filename, List<String> probes) throws IOException {
        String line;
        String[] geneNames = new String[probes.size()];
        TextFile in = new TextFile(filename, false);
        in.readLine();
        while ((line = in.readLine()) != null) {
            String[] split = SPLIT_ON_TAB.split(line);
            int indexOf = probes.indexOf(split[0]);
            if (indexOf < 0) continue;
            geneNames[indexOf] = split[1];
        }
        in.close();
        return geneNames;
    }

    public static void correlateScoreAndItemOfInterest(DoubleMatrixDataset<String, String> doubleMatrix, HashMap<String, HashMap<String, String>> interestSets, String outfile, boolean samplesOnRows) throws IOException {
        int i;
        HashMap<String, Integer> indeces = new HashMap<String, Integer>();
        int largestSet = 0;
        for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
            for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                int index = samplesOnRows ? doubleMatrix.rowObjects.indexOf(sample.getKey()) : doubleMatrix.colObjects.indexOf(sample.getKey());
                if (index > -1) {
                    indeces.put(sample.getKey(), index);
                    continue;
                }
                System.out.println("Potential mismatch between annotation and samples");
                System.out.println(sample.getKey() + " is not in value matrix");
                System.out.println("\n However :" + indeces.size() + " are in the matrix");
                System.exit(0);
            }
            if (largestSet >= set.getValue().size()) continue;
            largestSet = set.getValue().size();
        }
        Correlation.correlationToZScore(largestSet);
        TextFile plos = new TextFile("/Data/MJ/Epigenome-Wide_Scans.txt", false);
        Map<String, String> plosPValues = plos.readAsHashMap(0, 5);
        int nrProbes = samplesOnRows ? doubleMatrix.nrCols : doubleMatrix.nrRows;
        double[] metaZ = new double[nrProbes];
        double[][] probeGSEZ = new double[nrProbes][interestSets.size()];
        double[][] leaveOneOutProbeGSEZ = new double[nrProbes][interestSets.size() + 1];
        SpearmansCorrelation sc = new SpearmansCorrelation();
        TextFile out = new TextFile(outfile, true);
        String[] setNames = new String[interestSets.size()];
        int[] setSizes = new int[interestSets.size()];
        for (i = 0; i < nrProbes; ++i) {
            double[] zScores = new double[interestSets.size()];
            int index = 0;
            for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
                double zScore;
                int sizeOfGseSet;
                setSizes[index] = sizeOfGseSet = set.getValue().size();
                setNames[index] = set.getKey();
                ArrayDoubleList valueSet = new ArrayDoubleList();
                ArrayDoubleList ageSet = new ArrayDoubleList();
                for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                    if (samplesOnRows) {
                        valueSet.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                    } else {
                        valueSet.add(doubleMatrix.rawData[i][(Integer)indeces.get(sample.getKey())]);
                    }
                    try {
                        ageSet.add(Double.parseDouble(sample.getValue()));
                    }
                    catch (NumberFormatException ex) {
                        ageSet.add("male".equals(sample.getValue().toLowerCase()) ? 1.0 : 2.0);
                    }
                }
                double[] setValues = valueSet.toArray(new double[0]);
                double[] setAges = ageSet.toArray(new double[0]);
                double spearman = sc.correlation(setValues, setAges);
                zScores[index] = zScore = Correlation.convertCorrelationToZScore(sizeOfGseSet, spearman);
                ++index;
                if (!"ENSG00000117525".equals(doubleMatrix.rowObjects.get(i))) continue;
                System.out.println(zScore + " " + spearman + " " + ZScores.zToP(zScore));
            }
            probeGSEZ[i] = zScores;
            for (int leave = 0; leave < zScores.length; ++leave) {
                double[] zScoresLeft = new double[zScores.length - 1];
                int[] setSizesLeft = new int[zScores.length - 1];
                int zi = 0;
                for (int j = 0; j < zScores.length; ++j) {
                    if (j == leave) continue;
                    zScoresLeft[zi] = zScores[j];
                    setSizesLeft[zi] = setSizes[j];
                    ++zi;
                }
                double leftZ = ZScores.getWeightedZ(zScoresLeft, setSizesLeft);
                double p = ZScores.zToP(leftZ);
                leaveOneOutProbeGSEZ[i][leave + 1] = leftZ;
                out.writeln((String)doubleMatrix.colObjects.get(i) + "\t" + setNames[leave] + "\t" + leftZ + "\t" + p);
            }
            metaZ[i] = ZScores.getWeightedZ(zScores, setSizes);
            leaveOneOutProbeGSEZ[i][0] = metaZ[i];
            double p = ZScores.zToP(metaZ[i]);
            if (samplesOnRows) {
                out.writeln((String)doubleMatrix.colObjects.get(i) + "\t-\t" + metaZ[i] + "\t" + p);
                continue;
            }
            out.writeln((String)doubleMatrix.rowObjects.get(i) + "\t-\t" + metaZ[i] + "\t" + p);
        }
        out.close();
        for (i = 0; i < setNames.length; ++i) {
            System.out.println(setNames[i] + "\t" + setSizes[i]);
        }
        DoubleMatrixDataset probeGSEDataset = new DoubleMatrixDataset(probeGSEZ);
        probeGSEDataset.rowObjects = doubleMatrix.rowObjects;
        probeGSEDataset.colObjects = Arrays.asList(setNames);
        probeGSEDataset.save("/Data/Sasha/GeneGSEAgeCorrelationZScoresGPL570.txt");
    }

    public static void associateAnovaScoreAndItemOfInterest(DoubleMatrixDataset<String, String> doubleMatrix, HashMap<String, HashMap<String, String>> interestSets) {
        HashMap<String, Integer> indeces = new HashMap<String, Integer>();
        for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
            for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                if (doubleMatrix.rowObjects.contains(sample.getKey())) {
                    int index = doubleMatrix.rowObjects.indexOf(sample.getKey());
                    indeces.put(sample.getKey(), index);
                    continue;
                }
                System.out.println("Potential mismatch between annotation and samples");
                System.out.println(sample.getKey() + " is not in value matrix");
                System.out.println("\n However :" + indeces.size() + " are in the matrix");
                System.exit(0);
            }
        }
    }
}

