/*
 * Decompiled with CFR 0.152.
 */
package umcg.genetica.methylation;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.matrix.DoubleMatrixDataset;

public class ParseTcgaMethylationFile {
    private static Pattern SPLIT_ON_TAB = Pattern.compile("\\t");
    protected static final String ENCODING = "ISO-8859-1";

    public static DoubleMatrixDataset<String, String> parseTCGAData_lvl1(String fileInputFolder, boolean printToFile, String fileOutputFolder, boolean TcgaMethod) {
        File file = new File(fileInputFolder);
        File[] files = file.listFiles();
        ArrayList<File> vecFiles = new ArrayList<File>();
        for (int f = 0; f < files.length; ++f) {
            if (!files[f].getAbsolutePath().endsWith(".txt")) continue;
            vecFiles.add(files[f]);
        }
        System.out.println("Files to parse:\t" + vecFiles.size());
        int nrSamples = vecFiles.size();
        int nrProbes = 0;
        ArrayList<String> vecProbes = new ArrayList<String>();
        try {
            String str;
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream((File)vecFiles.get(0)), ENCODING), 8096);
            in.readLine();
            in.readLine();
            while ((str = in.readLine()) != null) {
                String[] data = SPLIT_ON_TAB.split(str);
                vecProbes.add(data[0]);
                ++nrProbes;
            }
            in.close();
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
            System.exit(-1);
        }
        System.out.println(nrProbes);
        DoubleMatrixDataset<String, String> dataset3 = new DoubleMatrixDataset<String, String>(nrProbes, nrSamples);
        for (int p = 0; p < vecProbes.size(); ++p) {
            dataset3.rowObjects.set(p, vecProbes.get(p));
        }
        for (int f = 0; f < nrSamples; ++f) {
            File currentFile = (File)vecFiles.get(f);
            try {
                BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(currentFile), ENCODING), 8096);
                String str = in.readLine();
                String[] data = SPLIT_ON_TAB.split(str);
                dataset3.colObjects.set(f, data[1]);
                str = in.readLine();
                data = SPLIT_ON_TAB.split(str);
                int columnM = -1;
                int columnU = -1;
                for (int d = 0; d < data.length; ++d) {
                    if (data[d].toLowerCase().trim().replace(" ", "_").equals("methylated_signal_intensity_(m)")) {
                        columnM = d;
                    }
                    if (!data[d].toLowerCase().trim().replace(" ", "_").equals("un-methylated_signal_intensity_(u)")) continue;
                    columnU = d;
                }
                int p = 0;
                while ((str = in.readLine()) != null) {
                    data = SPLIT_ON_TAB.split(str);
                    if (data[columnM].equals("NA") || data[columnM].equals("NaN")) {
                        data[columnM] = "-999";
                    }
                    if (data[columnU].equals("NA") || data[columnU].equals("NaN")) {
                        data[columnU] = "-999";
                    }
                    double methylatedSignal = Double.parseDouble(data[columnM]);
                    double unmethylatedSignal = Double.parseDouble(data[columnU]);
                    dataset3.rawData[p][f] = methylatedSignal == -999.0 || unmethylatedSignal == -999.0 ? -999.0 : (methylatedSignal == 0.0 && unmethylatedSignal == 0.0 ? 0.0 : (methylatedSignal <= 0.0 || unmethylatedSignal <= 0.0 ? -999.0 : (TcgaMethod ? methylatedSignal / (methylatedSignal + unmethylatedSignal) : methylatedSignal / (methylatedSignal + unmethylatedSignal + 100.0))));
                    String probe = (String)vecProbes.get(p);
                    if (!data[0].equals(probe)) {
                        System.out.println("Error!:\t" + f + "\t" + data[0] + "\t" + probe);
                    }
                    ++p;
                }
                in.close();
                continue;
            }
            catch (IOException e) {
                System.out.println(e.getMessage());
                System.exit(-1);
            }
        }
        dataset3.recalculateHashMaps();
        if (printToFile) {
            try {
                dataset3.save(fileOutputFolder + "/TCGADataBeta.txt");
            }
            catch (IOException ex) {
                Logger.getLogger(ParseTcgaMethylationFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        ArrayList tmp = new ArrayList(3);
        return dataset3;
    }

    public static DoubleMatrixDataset<String, String> parseTCGAData450As27K_lvl1(String fileInputFolder, boolean printToFile, String fileOutputFolder, boolean TcgaMethod, HashMap<String, Boolean> probeList) {
        HashMap probeIndex = new HashMap();
        File file = new File(fileInputFolder);
        File[] files = file.listFiles();
        ArrayList<File> vecFiles = new ArrayList<File>();
        for (int f = 0; f < files.length; ++f) {
            if (!files[f].getAbsolutePath().endsWith(".txt")) continue;
            vecFiles.add(files[f]);
        }
        System.out.println("Files to parse:\t" + vecFiles.size());
        int nrSamples = vecFiles.size();
        int nrProbes = 0;
        ArrayList<String> vecProbes = new ArrayList<String>();
        try {
            String str;
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream((File)vecFiles.get(0)), ENCODING), 8096);
            in.readLine();
            in.readLine();
            while ((str = in.readLine()) != null) {
                String[] data = SPLIT_ON_TAB.split(str);
                vecProbes.add(data[0]);
                ++nrProbes;
            }
            in.close();
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
            System.exit(-1);
        }
        System.out.println(nrProbes + "\t" + probeList.size());
        DoubleMatrixDataset<String, String> dataset3 = new DoubleMatrixDataset<String, String>(probeList.size(), nrSamples);
        int position = 0;
        for (int p = 0; p < vecProbes.size(); ++p) {
            if (!probeList.containsKey(vecProbes.get(p))) continue;
            dataset3.rowObjects.set(position, vecProbes.get(p));
            probeIndex.put(vecProbes.get(p), position);
            ++position;
        }
        for (int f = 0; f < nrSamples; ++f) {
            String currentFile = ((File)vecFiles.get(f)).getAbsolutePath();
            System.out.println("Processing:\t" + f + "\t" + currentFile);
            int columnM = 1;
            int columnU = 2;
            try {
                TextFile in = new TextFile(currentFile, false);
                String str = in.readLine();
                String[] data = SPLIT_ON_TAB.split(str);
                dataset3.colObjects.set(f, data[1]);
                in.readLine();
                int nrSet = 0;
                while ((str = in.readLine()) != null && nrSet != probeIndex.size()) {
                    data = SPLIT_ON_TAB.split(str);
                    if (!probeIndex.containsKey(data[0])) continue;
                    ++nrSet;
                    int p = (Integer)probeIndex.get(data[0]);
                    double methylatedSignal = data[columnM].equals("NA") || data[columnM].equals("NaN") ? -999.0 : Double.parseDouble(data[columnM]);
                    double unmethylatedSignal = data[columnU].equals("NA") || data[columnU].equals("NaN") ? -999.0 : Double.parseDouble(data[columnU]);
                    if (methylatedSignal == -999.0 || unmethylatedSignal == -999.0) {
                        dataset3.rawData[p][f] = -999.0;
                        continue;
                    }
                    if (methylatedSignal == 0.0 && unmethylatedSignal == 0.0) {
                        dataset3.rawData[p][f] = 0.0;
                        continue;
                    }
                    if (methylatedSignal <= 0.0 || unmethylatedSignal <= 0.0) {
                        dataset3.rawData[p][f] = -999.0;
                        continue;
                    }
                    if (TcgaMethod) {
                        dataset3.rawData[p][f] = methylatedSignal / (methylatedSignal + unmethylatedSignal);
                        continue;
                    }
                    dataset3.rawData[p][f] = methylatedSignal / (methylatedSignal + unmethylatedSignal + 100.0);
                }
                in.close();
                continue;
            }
            catch (IOException e) {
                System.out.println(e.getMessage());
                System.exit(-1);
            }
        }
        dataset3.recalculateHashMaps();
        if (printToFile) {
            try {
                dataset3.save(fileOutputFolder + "TCGA_450K-27K_DataBeta.txt");
            }
            catch (IOException ex) {
                Logger.getLogger(ParseTcgaMethylationFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        ArrayList tmp = new ArrayList(3);
        return dataset3;
    }

    public static ArrayList<DoubleMatrixDataset<String, String>> parseTCGAData_lvl1_all_matrices(String fileInputFolder, boolean printToFile, String fileOutputFolder, boolean TcgaMethod) {
        File file = new File(fileInputFolder);
        File[] files = file.listFiles();
        ArrayList<File> vecFiles = new ArrayList<File>();
        for (int f = 0; f < files.length; ++f) {
            if (!files[f].getAbsolutePath().endsWith(".txt")) continue;
            vecFiles.add(files[f]);
        }
        System.out.println("Files to parse:\t" + vecFiles.size());
        int nrSamples = vecFiles.size();
        int nrProbes = 0;
        ArrayList<String> vecProbes = new ArrayList<String>();
        try {
            String str;
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream((File)vecFiles.get(0)), ENCODING), 8096);
            in.readLine();
            in.readLine();
            while ((str = in.readLine()) != null) {
                String[] data = SPLIT_ON_TAB.split(str);
                vecProbes.add(data[0]);
                ++nrProbes;
            }
            in.close();
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
            System.exit(-1);
        }
        System.out.println(nrProbes);
        DoubleMatrixDataset dataset1 = new DoubleMatrixDataset(nrProbes, nrSamples);
        DoubleMatrixDataset dataset2 = new DoubleMatrixDataset(nrProbes, nrSamples);
        DoubleMatrixDataset dataset3 = new DoubleMatrixDataset(nrProbes, nrSamples);
        for (int p = 0; p < vecProbes.size(); ++p) {
            dataset1.rowObjects.set(p, vecProbes.get(p));
            dataset2.rowObjects.set(p, vecProbes.get(p));
            dataset3.rowObjects.set(p, vecProbes.get(p));
        }
        for (int f = 0; f < nrSamples; ++f) {
            File currentFile = (File)vecFiles.get(f);
            System.out.println("Processing:\t" + f + "\t" + currentFile.getAbsolutePath());
            try {
                BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(currentFile), ENCODING), 8096);
                String str = in.readLine();
                String[] data = SPLIT_ON_TAB.split(str);
                dataset1.colObjects.set(f, data[1]);
                dataset2.colObjects.set(f, data[1]);
                dataset3.colObjects.set(f, data[1]);
                str = in.readLine();
                data = SPLIT_ON_TAB.split(str);
                int columnM = -1;
                int columnU = -1;
                for (int d = 0; d < data.length; ++d) {
                    if (data[d].toLowerCase().trim().replace(" ", "_").equals("methylated_signal_intensity_(m)")) {
                        columnM = d;
                    }
                    if (!data[d].toLowerCase().trim().replace(" ", "_").equals("un-methylated_signal_intensity_(u)")) continue;
                    columnU = d;
                }
                int p = 0;
                while ((str = in.readLine()) != null) {
                    data = SPLIT_ON_TAB.split(str);
                    if (data[columnM].equals("NA") || data[columnM].equals("NaN")) {
                        data[columnM] = "-999";
                    }
                    if (data[columnU].equals("NA") || data[columnU].equals("NaN")) {
                        data[columnU] = "-999";
                    }
                    dataset1.rawData[p][f] = Double.parseDouble(data[columnM]);
                    dataset2.rawData[p][f] = Double.parseDouble(data[columnU]);
                    dataset3.rawData[p][f] = dataset1.rawData[p][f] == -999.0 || dataset2.rawData[p][f] == -999.0 ? -999.0 : (dataset1.rawData[p][f] == 0.0 && dataset2.rawData[p][f] == 0.0 ? 0.0 : (dataset1.rawData[p][f] <= 0.0 || dataset2.rawData[p][f] <= 0.0 ? -999.0 : (TcgaMethod ? dataset1.rawData[p][f] / (dataset1.rawData[p][f] + dataset2.rawData[p][f]) : dataset1.rawData[p][f] / (dataset1.rawData[p][f] + dataset2.rawData[p][f] + 100.0))));
                    String probe = (String)vecProbes.get(p);
                    if (!data[0].equals(probe)) {
                        System.out.println("Error!:\t" + f + "\t" + data[0] + "\t" + probe);
                    }
                    ++p;
                }
                in.close();
                continue;
            }
            catch (IOException e) {
                System.out.println(e.getMessage());
                System.exit(-1);
            }
        }
        dataset1.recalculateHashMaps();
        dataset2.recalculateHashMaps();
        dataset3.recalculateHashMaps();
        if (printToFile) {
            try {
                dataset1.save(fileOutputFolder + "/TCGADataM.txt");
            }
            catch (IOException ex) {
                Logger.getLogger(ParseTcgaMethylationFile.class.getName()).log(Level.SEVERE, null, ex);
            }
            try {
                dataset2.save(fileOutputFolder + "/TCGADataU.txt");
            }
            catch (IOException ex) {
                Logger.getLogger(ParseTcgaMethylationFile.class.getName()).log(Level.SEVERE, null, ex);
            }
            try {
                dataset3.save(fileOutputFolder + "/TCGADataBeta.txt");
            }
            catch (IOException ex) {
                Logger.getLogger(ParseTcgaMethylationFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        ArrayList<DoubleMatrixDataset<String, String>> tmp = new ArrayList<DoubleMatrixDataset<String, String>>(3);
        tmp.add(dataset1);
        tmp.add(dataset2);
        tmp.add(dataset3);
        return tmp;
    }

    public static DoubleMatrixDataset<String, String> parseTCGAData_lvl3(String fileInputFolder, int nrProbes, boolean printToFile, String fileOut) {
        File file = new File(fileInputFolder);
        File[] files = file.listFiles();
        ArrayList<File> vecFiles = new ArrayList<File>();
        for (int f = 0; f < files.length; ++f) {
            if (!files[f].getAbsolutePath().endsWith(".txt")) continue;
            vecFiles.add(files[f]);
        }
        System.out.println("Files to parse:\t" + vecFiles.size());
        int nrSamples = vecFiles.size();
        int nrP = 0;
        ArrayList<String> vecProbes = new ArrayList<String>();
        try {
            String str;
            BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream((File)vecFiles.get(0)), ENCODING), 8096);
            in.readLine();
            in.readLine();
            while ((str = in.readLine()) != null) {
                String[] data = str.split("\t");
                vecProbes.add(data[0]);
                ++nrP;
            }
            in.close();
        }
        catch (IOException e) {
            System.out.println(e.getMessage());
            System.exit(-1);
        }
        DoubleMatrixDataset<String, String> dataset1 = new DoubleMatrixDataset<String, String>(nrProbes, nrSamples);
        for (int p = 0; p < vecProbes.size(); ++p) {
            dataset1.rowObjects.set(p, (String)vecProbes.get(p));
        }
        for (int f = 0; f < nrSamples; ++f) {
            File currentFile = (File)vecFiles.get(f);
            System.out.println("Processing:\t" + f + "\t" + currentFile.getAbsolutePath());
            try {
                BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(currentFile), ENCODING), 8096);
                String str = in.readLine();
                String[] data = str.split("\t");
                dataset1.colObjects.set(f, data[1]);
                in.readLine();
                int p = 0;
                while ((str = in.readLine()) != null) {
                    data = str.split("\t");
                    if (f == 0) {
                        vecProbes.add(data[0]);
                    }
                    if (data[1].equals("NA")) {
                        data[1] = "-999";
                    }
                    dataset1.rawData[p][f] = Double.parseDouble(data[1]);
                    String probe = (String)vecProbes.get(p);
                    if (!data[0].equals(probe)) {
                        System.out.println("Error!:\t" + f + "\t" + data[0] + "\t" + probe);
                    }
                    ++p;
                }
                in.close();
                continue;
            }
            catch (IOException e) {
                System.out.println(e.getMessage());
                System.exit(-1);
            }
        }
        dataset1.recalculateHashMaps();
        if (printToFile) {
            try {
                dataset1.save(fileOut);
            }
            catch (IOException ex) {
                Logger.getLogger(ParseTcgaMethylationFile.class.getName()).log(Level.SEVERE, null, ex);
            }
        }
        System.out.println(dataset1.colObjects.toString());
        return dataset1;
    }
}

