/*
 * Decompiled with CFR 0.152.
 */
package umcg.genetica.io.geofiles;

import cern.colt.matrix.tdouble.DoubleMatrix2D;
import cern.colt.matrix.tdouble.impl.DenseDoubleMatrix2D;
import cern.colt.matrix.tdouble.impl.DenseLargeDoubleMatrix2D;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.lang.math.NumberUtils;
import umcg.genetica.io.text.TextFile;
import umcg.genetica.math.matrix2.DoubleMatrixDataset;

public class ParseTextTable {
    private static Pattern SPLIT_ON_TAB = Pattern.compile("\\t");
    protected static final String ENCODING = "ISO-8859-1";
    static final Logger LOGGER = Logger.getLogger(DoubleMatrixDataset.class.getName());

    public static DoubleMatrixDataset<String, String> parseGeoTables(String fileInput, boolean debug) throws IOException {
        DenseDoubleMatrix2D mat;
        System.out.println("\tNow parsing file: " + fileInput);
        LinkedHashSet<Integer> desiredColPos = new LinkedHashSet<Integer>();
        int columnOffset = 1;
        int rowOffset = 1;
        TextFile in = new TextFile(fileInput, false);
        String str = in.readLine();
        while (str.startsWith("#") || str.startsWith("\"#") || str.matches("^\\s*$") || str.equals("") || str.startsWith("This is our raw data.") || str.matches("^GSM[0-9]+.*") || str.startsWith("Illumina Inc. GenomeStudio") || str.startsWith("Array Content =") || str.startsWith("Normalization =") || str.startsWith("log")) {
            str = in.readLine();
            ++rowOffset;
        }
        String[] headerData = SPLIT_ON_TAB.split(str);
        String str2 = in.readLine();
        String[] nextRowData = SPLIT_ON_TAB.split(str2);
        for (int s = 0; s < headerData.length; ++s) {
            if (!(headerData[s].toLowerCase().contains("probe") && headerData[s].toLowerCase().contains("id") || headerData[s].toLowerCase().contains("ref") && headerData[s].toLowerCase().contains("id")) && (!headerData[s].toLowerCase().contains("array") || !headerData[s].toLowerCase().contains("address"))) continue;
            columnOffset = s + 1;
            break;
        }
        int tmpCols = headerData.length - columnOffset;
        LinkedHashMap<String, Integer> colMap = new LinkedHashMap<String, Integer>((int)Math.ceil((double)tmpCols / 0.75));
        int storedCols = 0;
        for (int s = 0; s < tmpCols; ++s) {
            String colName = headerData[s + columnOffset];
            if (!(colMap.containsKey(colName) || colName.equals("") || colName.equalsIgnoreCase("Target ID") || colName.equalsIgnoreCase("TargetID") || colName.equalsIgnoreCase("Probe ID") || colName.toLowerCase().contains(".p=") || colName.toLowerCase().contains("pval") || colName.toLowerCase().contains("detection") || colName.toLowerCase().contains("p-val") || colName.toLowerCase().contains("array") || colName.toLowerCase().contains("bead"))) {
                if (nextRowData.length <= s + columnOffset) continue;
                if (ParseTextTable.isNumeric(nextRowData[s + columnOffset])) {
                    colMap.put(colName, storedCols);
                    desiredColPos.add(s + columnOffset);
                    ++storedCols;
                    continue;
                }
                if (!debug) continue;
                System.out.println("In non-numeric, entry: " + nextRowData[s + columnOffset]);
                System.out.println("###############################");
                continue;
            }
            if (colMap.containsKey(colName)) {
                LOGGER.warning("Duplicated column name:" + colName + "! In file: " + fileInput);
                throw new IOException("Problem with parsing file");
            }
            if (!debug) continue;
            System.out.println("Empthy colname:" + colName.equals(""));
            System.out.println("Colname contains \"target id\":" + colName.equalsIgnoreCase("Target ID"));
            System.out.println("Colname contains \"probe id\":" + colName.equalsIgnoreCase("Probe ID"));
            System.out.println("Colname contains \".p\":" + colName.toLowerCase().contains(".p="));
            System.out.println("Colname contains \"pval\":" + colName.toLowerCase().contains("pval"));
            System.out.println("Colname contains \"p-val\":" + colName.toLowerCase().contains("p-val"));
            System.out.println("Colname contains \"detection\":" + colName.toLowerCase().contains("detection"));
            System.out.println("Colname contains \"array\":" + colName.toLowerCase().contains("array"));
            System.out.println("Colname contains \"bead\":" + colName.toLowerCase().contains("bead"));
            System.out.println("###############################");
        }
        if (colMap.size() == 0) {
            if (debug) {
                System.out.println("#Nothing added for this file: " + fileInput + ". First two rows:");
                System.out.println("Parsing of values oke? " + ParseTextTable.isNumeric(nextRowData[columnOffset]));
                System.out.println(str);
                System.out.println(str2);
                System.out.println(fileInput);
            }
            return null;
        }
        int tmpRows = 1;
        while (in.readLine() != null) {
            ++tmpRows;
        }
        in.close();
        double[][] initialMatrix = new double[tmpRows][storedCols];
        in.open();
        String headerRow = null;
        for (int i = 0; i < rowOffset; ++i) {
            headerRow = in.readLine();
        }
        int row = 0;
        LinkedHashMap<String, Integer> rowMap = new LinkedHashMap<String, Integer>((int)Math.ceil((double)tmpRows / 0.75));
        boolean correctData = true;
        while ((str = in.readLine()) != null) {
            String[] data = SPLIT_ON_TAB.split(str);
            if (data.length != headerData.length) continue;
            if (!rowMap.containsKey(data[columnOffset - 1])) {
                rowMap.put(data[columnOffset - 1], row);
                int columnToPut = 0;
                Iterator iterator = desiredColPos.iterator();
                while (iterator.hasNext()) {
                    double d;
                    int s = (Integer)iterator.next();
                    try {
                        d = Double.parseDouble(data[s]);
                    }
                    catch (NumberFormatException e) {
                        correctData = false;
                        d = Double.NaN;
                    }
                    initialMatrix[row][columnToPut] = d;
                    ++columnToPut;
                }
                ++row;
                continue;
            }
            LOGGER.warning("Duplicated row name: " + data[columnOffset - 1]);
            System.out.println(str);
            throw new IOException("Problem in reading file.");
        }
        if (!correctData) {
            LOGGER.warning("Your data contains NaN/unparseable values!");
        }
        in.close();
        if (tmpRows * tmpCols < 0x7FFFFFFD) {
            mat = new DenseDoubleMatrix2D(initialMatrix);
        } else {
            mat = new DenseLargeDoubleMatrix2D(tmpRows, tmpCols);
            mat.assign(initialMatrix);
        }
        DoubleMatrixDataset<String, String> dataset = new DoubleMatrixDataset<String, String>((DoubleMatrix2D)mat, rowMap, colMap);
        LOGGER.log(Level.INFO, "''{0}'' has been loaded, nrRows: {1} nrCols: {2}", new Object[]{fileInput, dataset.rows(), dataset.columns()});
        return dataset;
    }

    public static boolean isNumeric(String str) {
        return NumberUtils.isNumber((String)str);
    }
}

