com.slytechs.capture.file.indexer.SoftRegionIndexer.java Source code

Java tutorial

Introduction

Here is the source code for com.slytechs.capture.file.indexer.SoftRegionIndexer.java

Source

/**
 * Copyright (C) 2007 Sly Technologies, Inc. This library is free software; you
 * can redistribute it and/or modify it under the terms of the GNU Lesser
 * General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version. This
 * library is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details. You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
package com.slytechs.capture.file.indexer;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jnetstream.capture.file.HeaderReader;

import com.slytechs.capture.file.editor.BasicRecordIterator;
import com.slytechs.capture.file.editor.PartialLoader;
import com.slytechs.utils.Size;
import com.slytechs.utils.event.ProgressTask;
import com.slytechs.utils.event.SuperProgressTask;
import com.slytechs.utils.io.IORuntimeException;
import com.slytechs.utils.region.FlexRegion;

/**
 * @author Mark Bednarczyk
 * @author Sly Technologies, Inc.
 */
public class SoftRegionIndexer implements RegionIndexer {
    private static final Log logger = LogFactory.getLog(SoftRegionIndexer.class);

    private static final int LARGE_FILE_SAMPLE = 3000;

    private static final long LARGE_FILE_THRESHOLD = 10 * 1024 * 1024;

    private static final long MAX_HARD_RECORDS = 10000;

    private static final int MIN_FACTOR_SIZE = 500;

    private static final int SMALL_FILE_SAMPE = 300;

    private int factor = 0;

    private int length = 0;

    private final HeaderReader lengthGetter;

    private SuperProgressTask task;

    /*
     * TODO add implementation of partial region indexer
     */
    @SuppressWarnings("unused")
    private final PartialLoader loader;

    private IndexTable[] table;

    private ProgressTask scanTask;

    /**
     * @param data
     * @param name
     * @throws IOException
     */
    public SoftRegionIndexer(final FlexRegion<RegionIndexer> region, final long length, final PartialLoader sData)
            throws IOException {

        this(sData);
        /*
         * Just in case we have use soft indexes which will require rescanning using
         * the partial loader
         */
        // this.loader = sData;
        // this.lengthGetter = sData.getLengthGetter();
        //
        // RegionSegment<RegionIndexer> segment = region.getSegment(0);
        // RegionSegment linked = (RegionSegment) segment.getLinkedSegment();
        // RegionIndexer indexer = segment.getData();
        //
        // for (int i = 0; i < length; i++) {
        //
        // if (segment.checkBoundsGlobal(i) == false) {
        // segment = region.getSegment(i);
        // linked = (RegionSegment) segment.getLinkedSegment();
        // indexer = segment.getData();
        // }
        //
        // final long regional = segment.mapGlobalToRegional(i);
        //
        // final long position = linked.mapRegionalToGlobal(indexer
        // .mapIndexToPositionRegional((int) regional));
        //
        // }
    }

    /**
     * Indexes the entire unsegmented region. The region can not be segmented
     * 
     * @param target
     * @throws IOException
     */
    public SoftRegionIndexer(final PartialLoader loader) throws IOException {

        task = new SuperProgressTask("indexer");
        scanTask = task.addTask("scan file for records", loader.getLength());

        this.loader = loader;

        this.calculateFactor(loader);

        if (loader != null) {
            this.lengthGetter = loader.getLengthGetter();
            this.table = this.createIndexTableFromLoader(loader, this.factor);

            logger.trace("table=%d" + this.table.length);
        } else {
            this.lengthGetter = null;
        }

    }

    private void calculateFactor(final PartialLoader loader) throws IOException {
        /*
         * Calculate the soft index factor which determines how many soft indexes we
         * keep per every hard index. For very large files take a sample of packets
         * and calculate average size. For smaller files simply assume the minimum
         * of 30 bytes per packet as average.
         */
        final int ave = (loader.getLength() > SoftRegionIndexer.LARGE_FILE_THRESHOLD
                ? this.takeAverageSample(loader, SoftRegionIndexer.LARGE_FILE_SAMPLE)
                : this.takeAverageSample(loader, SoftRegionIndexer.SMALL_FILE_SAMPE));

        final long estPacketCount = loader.getLength() / ave;
        this.factor = (int) (estPacketCount / SoftRegionIndexer.MAX_HARD_RECORDS);

        /*
         * Make sure we use a reasonable minimum to be efficient
         */
        if ((this.factor != 0) && (this.factor < SoftRegionIndexer.MIN_FACTOR_SIZE)) {
            this.factor = SoftRegionIndexer.MIN_FACTOR_SIZE;
        }

        logger.trace("size=" + loader.getLength() + ", ave=" + ave + ", est=" + estPacketCount + ", factor="
                + this.factor);
    }

    /**
     * Method will iterate over the entire region full of records and index each
     * record's position in an array. Record positions in regional address space
     * are always constant, therefore once the region is indexed once, we don't
     * have to worry about positions any more.
     * 
     * @param loader
     * @param factor
     *          TODO
     * @return
     * @throws IOException
     */
    private IndexTable[] createIndexTableFromLoader(final PartialLoader loader, final int factor)
            throws IOException {

        final BasicRecordIterator iterator = new BasicRecordIterator(loader, this.lengthGetter);
        final int capacity = (factor == 0 ? 10000 : factor);

        final List<Long> temp = new ArrayList<Long>(capacity); // Rough estimate
        final List<IndexTable> it = new ArrayList<IndexTable>(100);

        long next = 0;
        long previous = 0;
        while (iterator.hasNext()) {
            final long regional = iterator.getPosition();
            temp.add(regional);
            iterator.skip();

            this.length++;
            if ((factor != 0) && (this.length % factor == 0)) {
                it.add(new SoftTable(temp, loader));
                temp.clear();
            }

            if (regional > next) {
                scanTask.update(regional - previous);
                next = regional + 200 * Size.OneMeg.bytes();

                previous = regional;
            }
        }

        scanTask.finish();

        if (temp.isEmpty() == false) {

            if (factor > 2) {
                it.add(new HardTable(temp));

            } else {
                it.add(new SoftTable(temp, loader));
            }
        }

        /*
         * Now lets turn the list into more efficient array since regions are not
         * mutable we can do that.
         */
        final IndexTable[] array = it.toArray(new IndexTable[it.size()]);

        return array;
    }

    /**
     * @return
     */
    public long getLength() {
        return this.length;
    }

    public long mapIndexToPositionRegional(final int regional) {
        if ((regional < 0) || (regional >= this.length)) {
            throw new IndexOutOfBoundsException(
                    "Regional index [" + regional + "] is out of bounds [0 - " + (this.length - 1) + "].");
        }

        final int it = (this.factor == 0 ? 0 : regional / this.factor);
        final int index = (this.factor == 0 ? regional : regional % this.factor);

        try {
            final long position = this.table[it].get(index);

            return position;

        } catch (final IOException e) {
            throw new IORuntimeException(e);
        }
    }

    /**
     * @param start
     * @return
     */
    public long mapSRegionalToTRegional(final long sRegional) {
        long r = -1;

        try {
            for (int i = 0; i < this.table.length; i++) {
                if ((r = this.table[i].search(sRegional)) != -1) {
                    r += i * this.factor;
                }
            }
        } catch (final IOException e) {
            throw new IORuntimeException(e);
        }

        return r;
    }

    /**
     * Takes a certain sample packet average by reading the first max number of
     * packets.
     * 
     * @param loader
     * @param max
     * @return
     * @throws IOException
     */
    private int takeAverageSample(final PartialLoader loader, final int max) throws IOException {
        final BasicRecordIterator iterator = new BasicRecordIterator(loader, loader.getLengthGetter());

        int count = 0;
        int total = 0;
        while (iterator.hasNext() && (count < max)) {
            final ByteBuffer b = iterator.next();
            total += loader.getLengthGetter().readLength(b);

            count++;
        }

        final int ave = total / count;

        return ave;
    }

    @Override
    public String toString() {
        final StringBuilder b = new StringBuilder("Idx");

        return b.toString();
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.slytechs.capture.file.indexer.RegionIndexer#keepInMemory(long,
     *      long)
     */
    public Object keepInMemory(long start, long count) throws IOException {
        count = (start + count <= this.length ? count : this.length - start);

        task = new SuperProgressTask("indexer");

        final int first = (this.factor == 0 ? 0 : (int) start / this.factor);
        final int end = (this.factor == 0 ? 1 : (int) (start + count) / this.factor) + 1;

        scanTask = task.addTask("locking indexes into memory", end - start);

        final List<Object> list = new ArrayList<Object>(10);

        for (int i = first; i < end; i++) {
            list.add(table[i].keepInMemory(0, 1));

            if (i % 1000 == 0) {
                scanTask.update(1000);
            }
        }

        scanTask.finish();

        return list.toArray();
    }

}