/*
 * Decompiled with CFR 0.152.
 */
package eqtlmappingpipeline.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import umcg.genetica.io.gmt.GMTFile;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.matrix.DoubleMatrixDataset;
import umcg.genetica.text.Strings;

public class ExpressionFileMerger {
    public void merge(String file1, String file2, String outfile) {
    }

    public void collapseOnGeneName(String in, String probeToGeneNameFile, String outfilename) throws IOException {
        HashMap probeToGeneMap = null;
        TextFile pm = new TextFile(probeToGeneNameFile, false);
        probeToGeneMap = (HashMap)pm.readAsHashMap(0, 1);
        pm.close();
        DoubleMatrixDataset d = new DoubleMatrixDataset(in);
        String[] probeNames = d.rowObjects.toArray(new String[0]);
        String[] sampleNames = d.colObjects.toArray(new String[0]);
        HashSet<String> genesAvailable = new HashSet<String>();
        for (int i = 0; i < probeNames.length; ++i) {
            String gene = (String)probeToGeneMap.get(probeNames[i]);
            if (gene == null) {
                gene = "-";
                probeToGeneMap.put(probeNames[i], gene);
            }
            genesAvailable.add(gene);
        }
        System.out.println("Total genes in expression file: " + genesAvailable.size());
        HashMap<String, Integer> geneToColMap = new HashMap<String, Integer>();
        int geneCtr = 0;
        for (String gene : genesAvailable) {
            geneToColMap.put(gene, geneCtr);
            ++geneCtr;
        }
        HashMap<String, Integer> nrProbesPerGene = new HashMap<String, Integer>();
        for (int i = 0; i < probeNames.length; ++i) {
            String gene = (String)probeToGeneMap.get(probeNames[i]);
            Integer id = (Integer)geneToColMap.get(gene);
            Integer nrProbes = (Integer)nrProbesPerGene.get(gene);
            if (nrProbes == null) {
                nrProbes = 0;
            }
            Integer n = nrProbes;
            Integer n2 = nrProbes = Integer.valueOf(nrProbes + 1);
            nrProbesPerGene.put(gene, nrProbes);
        }
        double[][] collapsedData = new double[geneCtr][sampleNames.length];
        double[][] rawData = d.rawData;
        for (int sample = 0; sample < sampleNames.length; ++sample) {
            for (int probe = 0; probe < probeNames.length; ++probe) {
                String probeName = probeNames[probe];
                String gene = (String)probeToGeneMap.get(probeName);
                Integer geneId = (Integer)geneToColMap.get(gene);
                double[] dArray = collapsedData[geneId];
                int n = sample;
                dArray[n] = dArray[n] + rawData[probe][sample];
            }
        }
        String[] newProbeNames = new String[genesAvailable.size()];
        for (String gene : genesAvailable) {
            Integer id = (Integer)geneToColMap.get(gene);
            Integer nrProbes = (Integer)nrProbesPerGene.get(gene);
            int i = 0;
            while (i < sampleNames.length) {
                double[] dArray = collapsedData[id];
                int n = i++;
                dArray[n] = dArray[n] / (double)nrProbes.intValue();
            }
            newProbeNames[id.intValue()] = gene;
        }
        DoubleMatrixDataset dout = new DoubleMatrixDataset();
        dout.colObjects = Arrays.asList(sampleNames);
        dout.rowObjects = Arrays.asList(newProbeNames);
        dout.rawData = collapsedData;
        dout.save(outfilename + ".gz");
        TextFile out = new TextFile(outfilename + "-NrProbesPerGene.txt", true);
        out.writeln("Gene\tNrProbes");
        for (String gene : genesAvailable) {
            Integer nrProbes = (Integer)nrProbesPerGene.get(gene);
            out.writeln(gene + "\t" + nrProbes);
        }
        out.close();
    }

    public void collapseProbesBasedOnPathwayAnnotation(String infile, String ensemblannot, String probeannot, String pathwayfile, int annotcol, String pathwayname, boolean standardnormalize) throws IOException {
        TextFile ptf = new TextFile(probeannot, false);
        String[] header = ptf.readLineElems(TextFile.tab);
        System.out.println("Assuming annotation " + header[annotcol]);
        String[] elems = ptf.readLineElems(TextFile.tab);
        HashMap<String, String> probeNrToHT12 = new HashMap<String, String>();
        while (elems != null) {
            if (elems.length > annotcol) {
                String ht12probe = elems[annotcol];
                String probeId = elems[0];
                if (ht12probe.length() > 1) {
                    probeNrToHT12.put(probeId, ht12probe);
                }
            } else {
                System.err.println("WARNING: probe annotation file does not contain all elements expected for line: ");
                System.err.println(Strings.concat(elems, Strings.tab));
            }
            elems = ptf.readLineElems(TextFile.tab);
        }
        ptf.close();
        TextFile etf = new TextFile(ensemblannot, false);
        elems = etf.readLineElems(TextFile.tab);
        HashMap probeToEns = new HashMap();
        while (elems != null) {
            String probe;
            if (elems.length >= 5 && probeNrToHT12.get(probe = elems[0].trim()) != null) {
                String ens = elems[4].trim();
                probeToEns.put(probeNrToHT12.get(probe), ens);
            }
            elems = etf.readLineElems(TextFile.tab);
        }
        etf.close();
        GMTFile gmt = new GMTFile(pathwayfile);
        ArrayList pathways = (ArrayList)gmt.getPathways();
        String query = "";
        HashSet genesInPathway = new HashSet();
        DoubleMatrixDataset ds = new DoubleMatrixDataset(infile);
        String[] probeNames = ds.rowObjects.toArray(new String[0]);
        ArrayList<String> selectedPathways = new ArrayList<String>();
        for (int pw = 0; pw < pathways.size(); ++pw) {
            query = (String)pathways.get(pw);
            if (pathways.contains(query)) {
                genesInPathway = (HashSet)gmt.getGenesForPathway(query);
            }
            int nonannotated = 0;
            int nrcollapsed = 0;
            HashMap<String, Integer> probeToProbeId = new HashMap<String, Integer>();
            ArrayList<Integer> probesToMerge = new ArrayList<Integer>();
            int probectr = 1;
            for (int i = 0; i < probeNames.length; ++i) {
                String probe = probeNames[i];
                String ensannot = (String)probeToEns.get(probe);
                if (ensannot == null) {
                    ++nonannotated;
                    probeToProbeId.put(probe, null);
                    continue;
                }
                if (genesInPathway.contains(ensannot)) {
                    ++nrcollapsed;
                    probesToMerge.add(i);
                    probeToProbeId.put(probe, 0);
                    continue;
                }
                probeToProbeId.put(probe, probectr);
                ++probectr;
            }
            if (nrcollapsed <= 3) continue;
            selectedPathways.add(query);
        }
        if (standardnormalize) {
            ds.standardNormalizeData();
        }
        String[] newProbeNames = selectedPathways.toArray(new String[0]);
        double[][] newData = new double[newProbeNames.length][ds.colObjects.size()];
        TextFile nrGenesUsed = new TextFile(infile + "-NrGenesUsedForCollapse-" + pathwayname + ".txt", true);
        for (int pw = 0; pw < selectedPathways.size(); ++pw) {
            query = (String)selectedPathways.get(pw);
            if (pathways.contains(query)) {
                genesInPathway = (HashSet)gmt.getGenesForPathway(query);
            }
            int nonannotated = 0;
            int nrcollapsed = 0;
            HashMap<String, Integer> probeToProbeId = new HashMap<String, Integer>();
            ArrayList<Integer> probesToMerge = new ArrayList<Integer>();
            int probectr = 1;
            for (int i = 0; i < probeNames.length; ++i) {
                String probe = probeNames[i];
                String ensannot = (String)probeToEns.get(probe);
                if (ensannot == null) {
                    ++nonannotated;
                    probeToProbeId.put(probe, null);
                    continue;
                }
                if (genesInPathway.contains(ensannot)) {
                    ++nrcollapsed;
                    probesToMerge.add(i);
                    probeToProbeId.put(probe, 0);
                    continue;
                }
                probeToProbeId.put(probe, probectr);
                ++probectr;
            }
            System.out.println(nonannotated + " probes don't have an ensembl annotation, " + nrcollapsed + " will be collapsed");
            int finalNrOfProbes = probectr;
            System.out.println("Final size: " + probectr);
            nrGenesUsed.writeln(query + "\t" + nonannotated + " probes don't have an ensembl annotation, " + nrcollapsed + " will be collapsed. Final size: " + probectr);
            double[][] originalData = ds.rawData;
            double[] mergedProbes = new double[ds.colObjects.size()];
            for (int col = 0; col < ds.colObjects.size(); ++col) {
                double probeSum = 0.0;
                int q = 0;
                for (Integer i : probesToMerge) {
                    double v = originalData[i][col];
                    probeSum += v;
                    ++q;
                }
                mergedProbes[col] = probeSum / (double)probesToMerge.size();
            }
            newData[pw] = mergedProbes;
        }
        nrGenesUsed.close();
        String outfile = infile + "-PathWayCollapsed-" + pathwayname;
        if (standardnormalize) {
            outfile = outfile + "-SDNorm";
        }
        outfile = outfile + ".txt.gz";
        System.out.println("Outfile: " + outfile);
        ds.rawData = newData;
        ds.rowObjects = Arrays.asList(newProbeNames);
        ds.save(outfile);
    }
}

