/*
 * Decompiled with CFR 0.152.
 */
package net.sf.picard.sam;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import net.sf.picard.PicardException;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
import net.sf.picard.metrics.MetricsFile;
import net.sf.picard.sam.AbstractDuplicateFindingAlgorithm;
import net.sf.picard.sam.DuplicationMetrics;
import net.sf.picard.util.Histogram;
import net.sf.picard.util.Log;
import net.sf.picard.util.PeekableIterator;
import net.sf.picard.util.ProgressLogger;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.SequenceUtil;
import net.sf.samtools.util.SortingCollection;
import net.sf.samtools.util.StringUtil;

public class EstimateLibraryComplexity
extends AbstractDuplicateFindingAlgorithm {
    @Usage
    public final String USAGE = "Attempts to estimate library complexity from sequence of read pairs alone. Does so by sorting all reads by the first N bases (5 by default) of each read and then comparing reads with the first N bases identical to each other for duplicates.  Reads are considered to be duplicates if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE (0.03 by default).\n\nReads of poor quality are filtered out so as to provide a more accurate estimate. The filtering removes reads with any no-calls in the first N bases or with a mean base quality lower than MIN_MEAN_QUALITY across either the first or second read.\n\nUnpaired reads are ignored in this computation.\n\nThe algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the calculation of library size. Also, since there is no alignment to screen out technical reads one further filter is applied on the data.  After examining all reads a histogram is built of [#reads in duplicate set -> #of duplicate sets]; all bins that contain exactly one duplicate set are then removed from the histogram as outliers before library size is estimated.";
    @Option(shortName="I", doc="One or more files to combine and estimate library complexity from. Reads can be mapped or unmapped.")
    public List<File> INPUT;
    @Option(shortName="O", doc="Output file to writes per-library metrics to.")
    public File OUTPUT;
    @Option(doc="The minimum number of bases at the starts of reads that must be identical for reads to be grouped together for duplicate detection.  In effect total_reads / 4^max_id_bases reads will be compared at a time, so lower numbers will produce more accurate results but consume exponentially more memory and CPU.")
    public int MIN_IDENTICAL_BASES = 5;
    @Option(doc="The maximum rate of differences between two reads to call them identical.")
    public double MAX_DIFF_RATE = 0.03;
    @Option(doc="The minimum mean quality of the bases in a read pair for the read to be analyzed. Reads with lower average quality are filtered out and not considered in any calculations.")
    public int MIN_MEAN_QUALITY = 20;
    @Option(doc="Do not process self-similar groups that are this many times over the mean expected group size. I.e. if the input contains 10m read pairs and MIN_IDENTICAL_BASES is set to 5, then the mean expected group size would be approximately 10 reads.")
    public int MAX_GROUP_RATIO = 500;
    private final Log log = Log.getInstance(EstimateLibraryComplexity.class);

    public static void main(String[] stringArray) {
        new EstimateLibraryComplexity().instanceMainWithExit(stringArray);
    }

    @Override
    protected int doWork() {
        Object object;
        Object object2;
        Object object3;
        HashMap<Object, Object> hashMap;
        for (File serializable22 : this.INPUT) {
            IoUtil.assertFileIsReadable(serializable22);
        }
        int n = (int)(Runtime.getRuntime().maxMemory() / (long)PairedReadSequence.size_in_bytes) / 2;
        this.log.info("Will store " + n + " read pairs in memory before sorting.");
        ArrayList<SAMReadGroupRecord> arrayList = new ArrayList<SAMReadGroupRecord>();
        int n2 = 0;
        SortingCollection<PairedReadSequence> sortingCollection = SortingCollection.newInstance(PairedReadSequence.class, new PairedReadCodec(), new PairedReadComparator(), n, this.TMP_DIR);
        ProgressLogger progressLogger = new ProgressLogger(this.log, 1000000, "Read");
        for (File file : this.INPUT) {
            hashMap = new HashMap<Object, Object>();
            SAMFileReader sAMFileReader = new SAMFileReader(file);
            arrayList.addAll(sAMFileReader.getFileHeader().getReadGroups());
            for (SAMRecord sAMRecord : sAMFileReader) {
                if (!sAMRecord.getReadPairedFlag() || !sAMRecord.getFirstOfPairFlag() && !sAMRecord.getSecondOfPairFlag()) continue;
                PairedReadSequence pairedReadSequence = (PairedReadSequence)hashMap.remove(sAMRecord.getReadName());
                if (pairedReadSequence == null) {
                    SAMReadGroupRecord metricsFile;
                    pairedReadSequence = new PairedReadSequence();
                    if (this.addLocationInformation(sAMRecord.getReadName(), pairedReadSequence) && (metricsFile = sAMRecord.getReadGroup()) != null) {
                        pairedReadSequence.setReadGroup((short)arrayList.indexOf(metricsFile));
                    }
                    hashMap.put(sAMRecord.getReadName(), pairedReadSequence);
                }
                boolean list = this.passesQualityCheck(sAMRecord.getReadBases(), sAMRecord.getBaseQualities(), this.MIN_IDENTICAL_BASES, this.MIN_MEAN_QUALITY);
                pairedReadSequence.qualityOk = pairedReadSequence.qualityOk && list;
                object3 = sAMRecord.getReadBases();
                if (sAMRecord.getReadNegativeStrandFlag()) {
                    SequenceUtil.reverseComplement((byte[])object3);
                }
                if (sAMRecord.getFirstOfPairFlag()) {
                    pairedReadSequence.read1 = (byte[])object3;
                } else {
                    pairedReadSequence.read2 = (byte[])object3;
                }
                if (pairedReadSequence.read1 != null && pairedReadSequence.read2 != null && pairedReadSequence.qualityOk) {
                    sortingCollection.add(pairedReadSequence);
                }
                progressLogger.record(sAMRecord);
            }
        }
        this.log.info("Finished reading - moving on to scanning for duplicates.");
        Iterator<File> iterator = new PeekableIterator<File>(sortingCollection.iterator());
        HashMap hashMap2 = new HashMap();
        hashMap = new HashMap();
        int n3 = 0;
        long l = System.currentTimeMillis();
        int n4 = Math.max(1, n2 / 2 / (int)Math.pow(4.0, this.MIN_IDENTICAL_BASES * 2));
        while (((PeekableIterator)iterator).hasNext()) {
            List<PairedReadSequence> list = this.getNextGroup((PeekableIterator<PairedReadSequence>)iterator);
            if (list.size() > n4 * this.MAX_GROUP_RATIO) {
                object3 = list.get(0);
                this.log.warn("Omitting group with over " + this.MAX_GROUP_RATIO + " times the expected mean number of read pairs. " + "Mean=" + n4 + ", Actual=" + list.size() + ". Prefixes: " + StringUtil.bytesToString(((PairedReadSequence)object3).read1, 0, this.MIN_IDENTICAL_BASES) + " / " + StringUtil.bytesToString(((PairedReadSequence)object3).read1, 0, this.MIN_IDENTICAL_BASES));
                continue;
            }
            object3 = this.splitByLibrary(list, arrayList);
            for (Map.Entry entry : object3.entrySet()) {
                object2 = (String)entry.getKey();
                object = (List)entry.getValue();
                Histogram<Integer> histogram = (Histogram)hashMap2.get(object2);
                Histogram<Integer> histogram2 = (Histogram)hashMap.get(object2);
                if (histogram == null) {
                    histogram = new Histogram<Integer>("duplication_group_count", (String)object2);
                    histogram2 = new Histogram<Integer>("duplication_group_count", "optical_duplicates");
                    hashMap2.put(object2, histogram);
                    hashMap.put(object2, histogram2);
                }
                for (int i = 0; i < object.size(); ++i) {
                    Object object4;
                    int n5;
                    PairedReadSequence pairedReadSequence = (PairedReadSequence)object.get(i);
                    if (pairedReadSequence == null) continue;
                    ArrayList<PairedReadSequence> arrayList2 = new ArrayList<PairedReadSequence>();
                    for (n5 = i + 1; n5 < object.size(); ++n5) {
                        object4 = (PairedReadSequence)object.get(n5);
                        if (object4 == null || !this.matches(pairedReadSequence, (PairedReadSequence)object4, this.MAX_DIFF_RATE)) continue;
                        arrayList2.add((PairedReadSequence)object4);
                        object.set(n5, null);
                    }
                    if (arrayList2.size() > 0) {
                        arrayList2.add(pairedReadSequence);
                        n5 = arrayList2.size();
                        histogram.increment(n5);
                        for (Object object5 : object4 = (Object)this.findOpticalDuplicates(arrayList2, this.OPTICAL_DUPLICATE_PIXEL_DISTANCE)) {
                            if (object5 == false) continue;
                            histogram2.increment(n5);
                        }
                        continue;
                    }
                    histogram.increment(1);
                }
            }
            ++n3;
            if (l >= System.currentTimeMillis() - 60000L) continue;
            this.log.info("Processed " + n3 + " groups.");
            l = System.currentTimeMillis();
        }
        ((PeekableIterator)iterator).close();
        sortingCollection.cleanup();
        MetricsFile metricsFile = this.getMetricsFile();
        for (Object object6 : hashMap2.keySet()) {
            Histogram histogram = (Histogram)hashMap2.get(object6);
            object2 = (Histogram)hashMap.get(object6);
            object = new DuplicationMetrics();
            ((DuplicationMetrics)object).LIBRARY = object6;
            for (Histogram<Integer> histogram2 : histogram.keySet()) {
                double d;
                double d2 = ((Histogram.Bin)histogram.get(histogram2)).getValue();
                double d3 = d = ((TreeMap)object2).get(histogram2) == null ? 0.0 : ((Histogram.Bin)((TreeMap)object2).get(histogram2)).getValue();
                if (!(d2 > 1.0)) continue;
                ((DuplicationMetrics)object).READ_PAIRS_EXAMINED = (long)((double)((DuplicationMetrics)object).READ_PAIRS_EXAMINED + (double)((Integer)((Object)histogram2)).intValue() * d2);
                ((DuplicationMetrics)object).READ_PAIR_DUPLICATES = (long)((double)((DuplicationMetrics)object).READ_PAIR_DUPLICATES + (double)((Integer)((Object)histogram2) - 1) * d2);
                ((DuplicationMetrics)object).READ_PAIR_OPTICAL_DUPLICATES = (long)((double)((DuplicationMetrics)object).READ_PAIR_OPTICAL_DUPLICATES + d);
            }
            ((DuplicationMetrics)object).calculateDerivedMetrics();
            metricsFile.addMetric(object);
            metricsFile.addHistogram(histogram);
        }
        metricsFile.write(this.OUTPUT);
        return 0;
    }

    private boolean matches(PairedReadSequence pairedReadSequence, PairedReadSequence pairedReadSequence2, double d) {
        int n;
        int n2 = Math.min(pairedReadSequence.read1.length, pairedReadSequence2.read1.length);
        int n3 = Math.min(pairedReadSequence.read2.length, pairedReadSequence2.read2.length);
        int n4 = (int)Math.floor((double)(n2 + n3) * d);
        int n5 = 0;
        for (n = this.MIN_IDENTICAL_BASES; n < n2; ++n) {
            if (pairedReadSequence.read1[n] == pairedReadSequence2.read1[n] || ++n5 <= n4) continue;
            return false;
        }
        for (n = this.MIN_IDENTICAL_BASES; n < n3; ++n) {
            if (pairedReadSequence.read2[n] == pairedReadSequence2.read2[n] || ++n5 <= n4) continue;
            return false;
        }
        return true;
    }

    List<PairedReadSequence> getNextGroup(PeekableIterator<PairedReadSequence> peekableIterator) {
        ArrayList<PairedReadSequence> arrayList = new ArrayList<PairedReadSequence>();
        PairedReadSequence pairedReadSequence = peekableIterator.next();
        arrayList.add(pairedReadSequence);
        block0: while (peekableIterator.hasNext()) {
            PairedReadSequence pairedReadSequence2 = peekableIterator.peek();
            for (int i = 0; i < this.MIN_IDENTICAL_BASES; ++i) {
                if (pairedReadSequence.read1[i] != pairedReadSequence2.read1[i] || pairedReadSequence.read2[i] != pairedReadSequence2.read2[i]) break block0;
            }
            arrayList.add(peekableIterator.next());
        }
        return arrayList;
    }

    Map<String, List<PairedReadSequence>> splitByLibrary(List<PairedReadSequence> list, List<SAMReadGroupRecord> list2) {
        HashMap<String, List<PairedReadSequence>> hashMap = new HashMap<String, List<PairedReadSequence>>();
        for (PairedReadSequence pairedReadSequence : list) {
            ArrayList<PairedReadSequence> arrayList;
            String string = null;
            if (pairedReadSequence.getReadGroup() != -1) {
                string = list2.get(pairedReadSequence.getReadGroup()).getLibrary();
                if (string == null) {
                    string = "Unknown";
                }
            } else {
                string = "Unknown";
            }
            if ((arrayList = (ArrayList<PairedReadSequence>)hashMap.get(string)) == null) {
                arrayList = new ArrayList<PairedReadSequence>();
                hashMap.put(string, arrayList);
            }
            arrayList.add(pairedReadSequence);
        }
        return hashMap;
    }

    boolean passesQualityCheck(byte[] byArray, byte[] byArray2, int n, int n2) {
        int n3;
        if (byArray.length < n) {
            return false;
        }
        for (n3 = 0; n3 < n; ++n3) {
            if (!SequenceUtil.isNoCall(byArray[n3])) continue;
            return false;
        }
        n3 = 0;
        for (byte by : byArray2) {
            n3 += by;
        }
        return n3 / byArray2.length >= n2;
    }

    class PairedReadComparator
    implements Comparator<PairedReadSequence> {
        final int BASES;

        PairedReadComparator() {
            this.BASES = EstimateLibraryComplexity.this.MIN_IDENTICAL_BASES;
        }

        @Override
        public int compare(PairedReadSequence pairedReadSequence, PairedReadSequence pairedReadSequence2) {
            int n;
            int n2;
            for (n2 = 0; n2 < this.BASES; ++n2) {
                n = pairedReadSequence.read1[n2] - pairedReadSequence2.read1[n2];
                if (n == 0) continue;
                return n;
            }
            for (n2 = 0; n2 < this.BASES; ++n2) {
                n = pairedReadSequence.read2[n2] - pairedReadSequence2.read2[n2];
                if (n == 0) continue;
                return n;
            }
            return System.identityHashCode(pairedReadSequence) - System.identityHashCode(pairedReadSequence2);
        }
    }

    static class PairedReadCodec
    implements SortingCollection.Codec<PairedReadSequence> {
        private DataOutputStream out;
        private DataInputStream in;

        PairedReadCodec() {
        }

        @Override
        public void setOutputStream(OutputStream outputStream) {
            this.out = new DataOutputStream(outputStream);
        }

        @Override
        public void setInputStream(InputStream inputStream) {
            this.in = new DataInputStream(inputStream);
        }

        @Override
        public void encode(PairedReadSequence pairedReadSequence) {
            try {
                this.out.writeShort(pairedReadSequence.readGroup);
                this.out.writeShort(pairedReadSequence.tile);
                this.out.writeShort(pairedReadSequence.x);
                this.out.writeShort(pairedReadSequence.y);
                this.out.writeInt(pairedReadSequence.read1.length);
                this.out.write(pairedReadSequence.read1);
                this.out.writeInt(pairedReadSequence.read2.length);
                this.out.write(pairedReadSequence.read2);
            }
            catch (IOException iOException) {
                throw new PicardException("Error write out read pair.", iOException);
            }
        }

        @Override
        public PairedReadSequence decode() {
            try {
                PairedReadSequence pairedReadSequence = new PairedReadSequence();
                try {
                    pairedReadSequence.readGroup = this.in.readShort();
                }
                catch (EOFException eOFException) {
                    return null;
                }
                pairedReadSequence.tile = this.in.readShort();
                pairedReadSequence.x = this.in.readShort();
                pairedReadSequence.y = this.in.readShort();
                int n = this.in.readInt();
                pairedReadSequence.read1 = new byte[n];
                if (this.in.read(pairedReadSequence.read1) != n) {
                    throw new PicardException("Could not read " + n + " bytes from temporary file.");
                }
                n = this.in.readInt();
                pairedReadSequence.read2 = new byte[n];
                if (this.in.read(pairedReadSequence.read2) != n) {
                    throw new PicardException("Could not read " + n + " bytes from temporary file.");
                }
                return pairedReadSequence;
            }
            catch (IOException iOException) {
                throw new PicardException("Exception reading read pair.", iOException);
            }
        }

        @Override
        public SortingCollection.Codec<PairedReadSequence> clone() {
            return new PairedReadCodec();
        }
    }

    static class PairedReadSequence
    implements AbstractDuplicateFindingAlgorithm.PhysicalLocation {
        static int size_in_bytes = 308;
        short readGroup = (short)-1;
        short tile = (short)-1;
        short x = (short)-1;
        short y = (short)-1;
        boolean qualityOk = true;
        byte[] read1;
        byte[] read2;

        PairedReadSequence() {
        }

        @Override
        public short getReadGroup() {
            return this.readGroup;
        }

        @Override
        public void setReadGroup(short s) {
            this.readGroup = s;
        }

        @Override
        public short getTile() {
            return this.tile;
        }

        @Override
        public void setTile(short s) {
            this.tile = s;
        }

        @Override
        public short getX() {
            return this.x;
        }

        @Override
        public void setX(short s) {
            this.x = s;
        }

        @Override
        public short getY() {
            return this.y;
        }

        @Override
        public void setY(short s) {
            this.y = s;
        }
    }
}

