/*
 * Decompiled with CFR 0.152.
 */
package umcg.genetica.methylation;

import JSci.maths.ArrayMath;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.collections.primitives.ArrayDoubleList;
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;
import org.apache.commons.math3.stat.inference.OneWayAnova;
import umcg.genetica.containers.Pair;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.math.stats.Correlation;
import umcg.genetica.math.stats.Heterogeneity;
import umcg.genetica.math.stats.TTest;
import umcg.genetica.math.stats.ZScores;
import umcg.genetica.methylation.SoftfileAnnotation;

public class MethylationAssociatoingAnnotationWithValues {
    private static Pattern SPLIT_ON_TAB = Pattern.compile("\\t");
    private static Pattern SPLIT_PARTS = Pattern.compile("-");

    public static void main(String[] args) {
        String fileWithAnnotation = "D:\\UMCG\\Methylation_GPL8490\\TCGA+GEO_14112012\\Annotation_AllSamples.txt";
        String dataFile = "D:\\UMCG\\Methylation_GPL8490\\TCGA+GEO_14112012\\methylation_Matrix_SexFiltered.QuantileNormalized.txt";
        System.out.print("Read annotation file .... ");
        HashMap<String, SoftfileAnnotation> sampleAnnotation = MethylationAssociatoingAnnotationWithValues.readAnnotationFile(fileWithAnnotation);
        System.out.println("done");
        System.out.print("Read eigenvector file .... ");
        DoubleMatrixDataset<String, String> eigenVectors = MethylationAssociatoingAnnotationWithValues.readDoubleMatrixFile(dataFile);
        System.out.println("done");
        ArrayList<String> setSelection = new ArrayList<String>();
        setSelection.addAll(Arrays.asList("GSE20236", "GSE23638", "GSE19711", "GSE20067", "GSE15745 // GSE36194", "GSE15745", "GSE41037", "GSE32393", "GSE31979", "GSE20242", "GSE20080", "GSE36194", "GSE22595", "GSE29661", "GSE21232", "GSE30653 // GSE30654", "GSE27097", "GSE37988", "GSE32861 // GSE32867", "GSE17448", "GSE33422", "GSE25033", "GSE34035", "GSE28746", "GSE32396"));
        String infoKey = "Age";
        String nameSeriesInfoColumn = "series id";
        eigenVectors = eigenVectors.getTransposedDataset();
        LinkedHashMap<String, HashMap<String, String>> interestSets = MethylationAssociatoingAnnotationWithValues.selectSamplesWithInformationOfInterest(sampleAnnotation, nameSeriesInfoColumn, infoKey, eigenVectors, 25);
        if (setSelection.size() > 0) {
            ArrayList<String> removeEntry = new ArrayList<String>();
            for (Map.Entry<String, HashMap<String, String>> e : interestSets.entrySet()) {
                if (setSelection.contains(e.getKey())) continue;
                removeEntry.add(e.getKey());
            }
            for (String s : removeEntry) {
                interestSets.remove(s);
            }
        }
        for (Map.Entry<String, HashMap<String, String>> tmp : interestSets.entrySet()) {
            System.out.println(tmp.getKey() + "\t" + tmp.getValue().size());
        }
        System.out.println("Number of interest sets: " + interestSets.size());
        MethylationAssociatoingAnnotationWithValues.correlateScoreAndItemOfInterest(eigenVectors, interestSets);
    }

    private static HashMap<String, SoftfileAnnotation> readAnnotationFile(String fileWithAnnotation) {
        HashMap<String, SoftfileAnnotation> sampleInfo = new HashMap<String, SoftfileAnnotation>();
        try {
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(new File(fileWithAnnotation)), "ISO-8859-1"), 8096);
            String str = in.readLine();
            String[] headers = SPLIT_ON_TAB.split(str);
            int meshInfoIndex = -1;
            for (int i = 1; i < headers.length; ++i) {
                if (!headers[i].toLowerCase().contains("mesh")) continue;
                meshInfoIndex = i;
                break;
            }
            while ((str = in.readLine()) != null) {
                String[] entries = SPLIT_ON_TAB.split(str);
                SoftfileAnnotation tmp = new SoftfileAnnotation();
                tmp.setAccession(entries[0]);
                if (meshInfoIndex >= 0) {
                    tmp.setMeshTerms(entries[meshInfoIndex]);
                }
                for (int i = 1; i < entries.length; ++i) {
                    tmp.putAnnotationInformation(headers[i], entries[i]);
                }
                sampleInfo.put(entries[0], tmp);
            }
            in.close();
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
            System.exit(-1);
        }
        return sampleInfo;
    }

    private static DoubleMatrixDataset<String, String> readDoubleMatrixFile(String eigenVectorFile) {
        DoubleMatrixDataset<String, String> tmp = new DoubleMatrixDataset<String, String>();
        try {
            tmp = new DoubleMatrixDataset(eigenVectorFile, "\t");
        }
        catch (IOException ex) {
            Logger.getLogger(MethylationAssociatoingAnnotationWithValues.class.getName()).log(Level.SEVERE, null, ex);
        }
        return tmp;
    }

    private static LinkedHashMap<String, HashMap<String, String>> selectSamplesWithInformationOfInterest(HashMap<String, SoftfileAnnotation> sampleAnnotation, String nameSeriesInfoColumn, String infoKey, DoubleMatrixDataset<String, String> eigenVectors, int minimalNumberSamplesInSeries) {
        Map.Entry<String, SoftfileAnnotation> tmp;
        LinkedHashMap<String, HashMap<String, String>> gseSets = new LinkedHashMap<String, HashMap<String, String>>();
        ArrayList<String> removeSamples = new ArrayList<String>();
        Iterator<Map.Entry<String, SoftfileAnnotation>> iterator = sampleAnnotation.entrySet().iterator();
        if (iterator.hasNext() && !(tmp = iterator.next()).getValue().getAnnotationInformation().containsKey(infoKey)) {
            System.out.print("No " + infoKey + " information");
            System.exit(0);
        }
        for (String sampleName : eigenVectors.rowObjects) {
            if (sampleAnnotation.containsKey(sampleName)) {
                SoftfileAnnotation sampleAnnot = sampleAnnotation.get(sampleName);
                if (!sampleAnnot.getAnnotationInformation().get(infoKey).isEmpty() || !sampleAnnot.getAnnotationInformation().get(infoKey).equals("")) {
                    String seriesId = sampleAnnot.getAnnotationInformation().get(nameSeriesInfoColumn);
                    if (gseSets.containsKey(seriesId)) {
                        System.out.println(sampleName + "\t" + sampleAnnot.getAnnotationInformation().get(infoKey));
                        ((HashMap)gseSets.get(seriesId)).put(sampleName, sampleAnnot.getAnnotationInformation().get(infoKey));
                        continue;
                    }
                    HashMap<String, String> tmp2 = new HashMap<String, String>();
                    tmp2.put(sampleName, sampleAnnot.getAnnotationInformation().get(infoKey));
                    gseSets.put(seriesId, tmp2);
                    continue;
                }
                System.out.println("No age info: " + sampleName);
                removeSamples.add(sampleName);
                continue;
            }
            if (sampleName.startsWith("TCGA-")) {
                String[] sampleIdParts = SPLIT_PARTS.split(sampleName);
                String newSampleName = sampleIdParts[0] + "-" + sampleIdParts[1] + "-" + sampleIdParts[2];
                if (sampleAnnotation.containsKey(newSampleName)) {
                    SoftfileAnnotation sampleAnnot = sampleAnnotation.get(newSampleName);
                    if (!sampleAnnot.getAnnotationInformation().get(infoKey).isEmpty() || !sampleAnnot.getAnnotationInformation().get(infoKey).equals("")) {
                        String seriesId = sampleAnnot.getAnnotationInformation().get(nameSeriesInfoColumn);
                        if (gseSets.containsKey(seriesId)) {
                            System.out.println(sampleName + "\t" + sampleAnnot.getAnnotationInformation().get(infoKey));
                            gseSets.get(seriesId).put(sampleName, sampleAnnot.getAnnotationInformation().get(infoKey));
                            continue;
                        }
                        HashMap<String, String> tmp3 = new HashMap<String, String>();
                        tmp3.put(sampleName, sampleAnnot.getAnnotationInformation().get(infoKey));
                        gseSets.put(seriesId, tmp3);
                        continue;
                    }
                    System.out.println("No age info: " + sampleName);
                    removeSamples.add(sampleName);
                    continue;
                }
                System.out.println("Not in matrix: " + sampleName);
                removeSamples.add(sampleName);
                continue;
            }
            System.out.println("Not in matrix: " + sampleName);
            removeSamples.add(sampleName);
        }
        ArrayList removeGseSets = new ArrayList();
        int numberOfInterestSets = 0;
        int numberOfInterestSamples = 0;
        if (gseSets.size() > 0) {
            for (Map.Entry gse : gseSets.entrySet()) {
                ArrayList uniqueValues = new ArrayList();
                for (Map.Entry sample : ((HashMap)gse.getValue()).entrySet()) {
                    if (uniqueValues.contains(sample.getValue())) continue;
                    uniqueValues.add(sample.getValue());
                }
                if (uniqueValues.size() > 1 && ((HashMap)gse.getValue()).size() >= minimalNumberSamplesInSeries) {
                    ++numberOfInterestSets;
                    for (Map.Entry sample : ((HashMap)gse.getValue()).entrySet()) {
                        ++numberOfInterestSamples;
                    }
                    continue;
                }
                removeGseSets.add(gse.getKey());
                for (Map.Entry sample : ((HashMap)gse.getValue()).entrySet()) {
                    removeSamples.add((String)sample.getKey());
                }
            }
        } else {
            System.out.println("Unforeseen error check Key and code");
            System.exit(0);
        }
        System.out.println("Number of sets: " + numberOfInterestSets);
        System.out.println("Total samples of interest: " + numberOfInterestSamples);
        for (String removeEntry : removeGseSets) {
            gseSets.remove(removeEntry);
        }
        for (String removeEntry : removeSamples) {
            sampleAnnotation.remove(removeEntry);
        }
        return gseSets;
    }

    private static HashMap<String, HashMap<String, String>> selectSamplesWithSeriesInformation(HashMap<String, SoftfileAnnotation> sampleAnnotation, DoubleMatrixDataset<String, String> eigenVectors) {
        HashMap<String, HashMap<String, String>> gseSets = new HashMap<String, HashMap<String, String>>();
        ArrayList<String> removeSamples = new ArrayList<String>();
        for (Map.Entry<String, SoftfileAnnotation> sample : sampleAnnotation.entrySet()) {
            if (!sample.getValue().getAnnotationInformation().get("series id").isEmpty() || !sample.getValue().getAnnotationInformation().get("series id").equals("")) {
                if (!eigenVectors.rowObjects.contains(sample.getKey())) continue;
                String seriesId = sample.getValue().getAnnotationInformation().get("series id");
                if (gseSets.containsKey(seriesId)) {
                    gseSets.get(seriesId).put(sample.getKey(), sample.getValue().getAnnotationInformation().get("series id"));
                    continue;
                }
                HashMap<String, String> tmp = new HashMap<String, String>();
                tmp.put(sample.getKey(), sample.getValue().getAnnotationInformation().get("series id"));
                gseSets.put(seriesId, tmp);
                continue;
            }
            removeSamples.add(sample.getKey());
        }
        for (String removeEntry : removeSamples) {
            sampleAnnotation.remove(removeEntry);
        }
        return gseSets;
    }

    private static void associateTTestScoreAndItemOfInterest(DoubleMatrixDataset<String, String> doubleMatrix, HashMap<String, HashMap<String, String>> interestSets, ArrayList<String> entries) {
        HashMap<String, Double> scorePerGse = new HashMap<String, Double>();
        HashMap<String, Integer> indeces = new HashMap<String, Integer>();
        for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
            for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                if (doubleMatrix.rowObjects.contains(sample.getKey())) {
                    int index = doubleMatrix.rowObjects.indexOf(sample.getKey());
                    indeces.put(sample.getKey(), index);
                    continue;
                }
                System.out.println("Potential mismatch between annotation and samples");
                System.out.println(sample.getKey() + " is not in value matrix");
                System.out.println("\n However :" + indeces.size() + " are in the matrix");
                System.exit(0);
            }
        }
        for (int i = 0; i < doubleMatrix.nrCols; ++i) {
            for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
                ArrayDoubleList valueSet1 = new ArrayDoubleList();
                ArrayDoubleList valueSet2 = new ArrayDoubleList();
                for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                    if (sample.getValue().equals(entries.get(0))) {
                        valueSet1.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                        continue;
                    }
                    if (!sample.getValue().equals(entries.get(1))) continue;
                    valueSet2.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                }
                double[] set1 = valueSet1.toArray(new double[0]);
                double[] set2 = valueSet2.toArray(new double[0]);
                if (set1.length <= 2 || set2.length <= 2) continue;
                double zScore = TTest.testZscore(set1, set2);
                scorePerGse.put((String)doubleMatrix.colObjects.get(i) + "_" + set.getKey(), zScore);
            }
        }
    }

    private static void correlateScoreAndItemOfInterest(DoubleMatrixDataset<String, String> doubleMatrix, LinkedHashMap<String, HashMap<String, String>> interestSets) {
        HashMap<String, Double> scorePerGse = new HashMap<String, Double>();
        HashMap<String, Integer> indeces = new HashMap<String, Integer>();
        int largestSet = 0;
        for (Map.Entry<String, HashMap<String, String>> entry : interestSets.entrySet()) {
            for (Map.Entry<String, String> sample : entry.getValue().entrySet()) {
                if (doubleMatrix.rowObjects.contains(sample.getKey())) {
                    int index = doubleMatrix.rowObjects.indexOf(sample.getKey());
                    indeces.put(sample.getKey(), index);
                    continue;
                }
                System.out.println("Potential mismatch between annotation and samples");
                System.out.println(sample.getKey() + " is not in value matrix");
                System.out.println("\n However :" + indeces.size() + " are in the matrix");
                System.exit(0);
            }
            if (largestSet >= entry.getValue().size()) continue;
            largestSet = entry.getValue().size();
        }
        Correlation.correlationToZScore(largestSet);
        double[] metaZ = new double[doubleMatrix.nrCols];
        System.out.println("Z-scores");
        System.out.print("\tMeta Z\tpValue\tHeterogeneity\tHeterogeneity pValue");
        for (String t : interestSets.keySet()) {
            System.out.print("\t" + t);
        }
        System.out.print("\t");
        for (String t : interestSets.keySet()) {
            System.out.print("\t" + t);
        }
        System.out.println("");
        SpearmansCorrelation spearmansCorrelation = new SpearmansCorrelation();
        for (int i = 0; i < doubleMatrix.nrCols; ++i) {
            double[] zScores = new double[interestSets.size()];
            double[] correlations = new double[interestSets.size()];
            int[] setSizes = new int[zScores.length];
            int index = 0;
            for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
                int sizeOfGseSet;
                setSizes[index] = sizeOfGseSet = set.getValue().size();
                ArrayDoubleList valueSet = new ArrayDoubleList();
                ArrayDoubleList ageSet = new ArrayDoubleList();
                for (Map.Entry<String, String> sample : set.getValue().entrySet()) {
                    valueSet.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                    try {
                        ageSet.add(Double.parseDouble(sample.getValue()));
                    }
                    catch (NumberFormatException ex) {
                        ageSet.add("male".equals(sample.getValue().toLowerCase()) ? 1.0 : 2.0);
                    }
                }
                double[] setValues = valueSet.toArray(new double[0]);
                double[] setAges = ageSet.toArray(new double[0]);
                if (setValues.length > 2) {
                    double zScore;
                    double spearman = spearmansCorrelation.correlation(setValues, setAges);
                    zScores[index] = zScore = Correlation.convertCorrelationToZScore(sizeOfGseSet, spearman);
                    correlations[index] = spearman;
                    scorePerGse.put((String)doubleMatrix.colObjects.get(i) + "_" + set.getKey(), zScore);
                } else {
                    zScores[index] = Double.NaN;
                }
                ++index;
            }
            double zSum = 0.0;
            double sampleSizeSum = 0.0;
            for (int j = 0; j < zScores.length; ++j) {
                if (Double.isNaN(zScores[j])) continue;
                zSum += Math.sqrt(setSizes[j]) * zScores[j];
                sampleSizeSum += (double)setSizes[j];
            }
            double p = ZScores.zToP(zSum /= Math.sqrt(sampleSizeSum));
            Pair<Double, Double> hg = Heterogeneity.getISq(zScores, setSizes);
            System.out.print((String)doubleMatrix.colObjects.get(i) + "\t" + zSum + "\t" + p + "\t" + hg.getLeft() + "\t" + hg.getRight());
            for (double z : zScores) {
                System.out.print("\t" + z);
            }
            System.out.print("\t");
            for (double r : correlations) {
                System.out.print("\t" + r);
            }
            System.out.println("");
            metaZ[i] = zSum;
        }
        System.out.println("");
        for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
            for (Map.Entry<String, String> e : set.getValue().entrySet()) {
                Integer sampleIndex = (Integer)indeces.get(e.getKey());
                double correlation = ArrayMath.correlation((double[])doubleMatrix.rawData[sampleIndex], (double[])metaZ);
                System.out.println(e.getKey() + "\t" + e.getValue() + "\t" + correlation);
            }
        }
    }

    private static void associateAnovaScoreAndItemOfInterest(DoubleMatrixDataset<String, String> doubleMatrix, HashMap<String, HashMap<String, String>> interestSets) {
        HashMap<String, Integer> indeces = new HashMap<String, Integer>();
        for (Map.Entry<String, HashMap<String, String>> set : interestSets.entrySet()) {
            for (Map.Entry<String, String> entry : set.getValue().entrySet()) {
                if (doubleMatrix.rowObjects.contains(entry.getKey())) {
                    int index = doubleMatrix.rowObjects.indexOf(entry.getKey());
                    indeces.put(entry.getKey(), index);
                    continue;
                }
                System.out.println("Potential mismatch between annotation and samples");
                System.out.println(entry.getKey() + " is not in value matrix");
                System.out.println("\n However :" + indeces.size() + " are in the matrix");
                System.exit(0);
            }
        }
        for (int i = 0; i < doubleMatrix.nrCols; ++i) {
            ArrayList<double[]> valueSets = new ArrayList<double[]>();
            for (Map.Entry<String, Object> entry : interestSets.entrySet()) {
                ArrayDoubleList valueSet = new ArrayDoubleList();
                for (Map.Entry sample : ((HashMap)entry.getValue()).entrySet()) {
                    valueSet.add(doubleMatrix.rawData[(Integer)indeces.get(sample.getKey())][i]);
                }
                if (valueSet.size() <= 2) continue;
                valueSets.add(valueSet.toArray(new double[0]));
            }
            OneWayAnova anova = new OneWayAnova();
            double d = -1.0;
            try {
                d = anova.anovaPValue(valueSets);
            }
            catch (IllegalArgumentException ex) {
                Logger.getLogger(MethylationAssociatoingAnnotationWithValues.class.getName()).log(Level.SEVERE, null, ex);
            }
            System.out.println("Component: " + (String)doubleMatrix.colObjects.get(i) + " capable of discriminating between the sets, with p-value: " + d);
        }
    }

    private static LinkedHashMap<String, HashMap<String, String>> splitInterstingSetInPortions(LinkedHashMap<String, HashMap<String, String>> interestSets, int maxSize) {
        LinkedHashMap<String, HashMap<String, String>> newInterestingSets = new LinkedHashMap<String, HashMap<String, String>>();
        for (Map.Entry<String, HashMap<String, String>> series : interestSets.entrySet()) {
            if (series.getValue().size() <= maxSize) continue;
            System.out.println(series.getValue().size() % maxSize);
        }
        return newInterestingSets;
    }
}

