Java tutorial
/** * Copyright (C) 2007 Sly Technologies, Inc. This library is free software; you * can redistribute it and/or modify it under the terms of the GNU Lesser * General Public License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. This * library is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package com.slytechs.capture.file.indexer; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jnetstream.capture.file.HeaderReader; import com.slytechs.capture.file.editor.BasicRecordIterator; import com.slytechs.capture.file.editor.PartialLoader; import com.slytechs.utils.Size; import com.slytechs.utils.event.ProgressTask; import com.slytechs.utils.event.SuperProgressTask; import com.slytechs.utils.io.IORuntimeException; import com.slytechs.utils.region.FlexRegion; /** * @author Mark Bednarczyk * @author Sly Technologies, Inc. */ public class SoftRegionIndexer implements RegionIndexer { private static final Log logger = LogFactory.getLog(SoftRegionIndexer.class); private static final int LARGE_FILE_SAMPLE = 3000; private static final long LARGE_FILE_THRESHOLD = 10 * 1024 * 1024; private static final long MAX_HARD_RECORDS = 10000; private static final int MIN_FACTOR_SIZE = 500; private static final int SMALL_FILE_SAMPE = 300; private int factor = 0; private int length = 0; private final HeaderReader lengthGetter; private SuperProgressTask task; /* * TODO add implementation of partial region indexer */ @SuppressWarnings("unused") private final PartialLoader loader; private IndexTable[] table; private ProgressTask scanTask; /** * @param data * @param name * @throws IOException */ public SoftRegionIndexer(final FlexRegion<RegionIndexer> region, final long length, final PartialLoader sData) throws IOException { this(sData); /* * Just in case we have use soft indexes which will require rescanning using * the partial loader */ // this.loader = sData; // this.lengthGetter = sData.getLengthGetter(); // // RegionSegment<RegionIndexer> segment = region.getSegment(0); // RegionSegment linked = (RegionSegment) segment.getLinkedSegment(); // RegionIndexer indexer = segment.getData(); // // for (int i = 0; i < length; i++) { // // if (segment.checkBoundsGlobal(i) == false) { // segment = region.getSegment(i); // linked = (RegionSegment) segment.getLinkedSegment(); // indexer = segment.getData(); // } // // final long regional = segment.mapGlobalToRegional(i); // // final long position = linked.mapRegionalToGlobal(indexer // .mapIndexToPositionRegional((int) regional)); // // } } /** * Indexes the entire unsegmented region. The region can not be segmented * * @param target * @throws IOException */ public SoftRegionIndexer(final PartialLoader loader) throws IOException { task = new SuperProgressTask("indexer"); scanTask = task.addTask("scan file for records", loader.getLength()); this.loader = loader; this.calculateFactor(loader); if (loader != null) { this.lengthGetter = loader.getLengthGetter(); this.table = this.createIndexTableFromLoader(loader, this.factor); logger.trace("table=%d" + this.table.length); } else { this.lengthGetter = null; } } private void calculateFactor(final PartialLoader loader) throws IOException { /* * Calculate the soft index factor which determines how many soft indexes we * keep per every hard index. For very large files take a sample of packets * and calculate average size. For smaller files simply assume the minimum * of 30 bytes per packet as average. */ final int ave = (loader.getLength() > SoftRegionIndexer.LARGE_FILE_THRESHOLD ? this.takeAverageSample(loader, SoftRegionIndexer.LARGE_FILE_SAMPLE) : this.takeAverageSample(loader, SoftRegionIndexer.SMALL_FILE_SAMPE)); final long estPacketCount = loader.getLength() / ave; this.factor = (int) (estPacketCount / SoftRegionIndexer.MAX_HARD_RECORDS); /* * Make sure we use a reasonable minimum to be efficient */ if ((this.factor != 0) && (this.factor < SoftRegionIndexer.MIN_FACTOR_SIZE)) { this.factor = SoftRegionIndexer.MIN_FACTOR_SIZE; } logger.trace("size=" + loader.getLength() + ", ave=" + ave + ", est=" + estPacketCount + ", factor=" + this.factor); } /** * Method will iterate over the entire region full of records and index each * record's position in an array. Record positions in regional address space * are always constant, therefore once the region is indexed once, we don't * have to worry about positions any more. * * @param loader * @param factor * TODO * @return * @throws IOException */ private IndexTable[] createIndexTableFromLoader(final PartialLoader loader, final int factor) throws IOException { final BasicRecordIterator iterator = new BasicRecordIterator(loader, this.lengthGetter); final int capacity = (factor == 0 ? 10000 : factor); final List<Long> temp = new ArrayList<Long>(capacity); // Rough estimate final List<IndexTable> it = new ArrayList<IndexTable>(100); long next = 0; long previous = 0; while (iterator.hasNext()) { final long regional = iterator.getPosition(); temp.add(regional); iterator.skip(); this.length++; if ((factor != 0) && (this.length % factor == 0)) { it.add(new SoftTable(temp, loader)); temp.clear(); } if (regional > next) { scanTask.update(regional - previous); next = regional + 200 * Size.OneMeg.bytes(); previous = regional; } } scanTask.finish(); if (temp.isEmpty() == false) { if (factor > 2) { it.add(new HardTable(temp)); } else { it.add(new SoftTable(temp, loader)); } } /* * Now lets turn the list into more efficient array since regions are not * mutable we can do that. */ final IndexTable[] array = it.toArray(new IndexTable[it.size()]); return array; } /** * @return */ public long getLength() { return this.length; } public long mapIndexToPositionRegional(final int regional) { if ((regional < 0) || (regional >= this.length)) { throw new IndexOutOfBoundsException( "Regional index [" + regional + "] is out of bounds [0 - " + (this.length - 1) + "]."); } final int it = (this.factor == 0 ? 0 : regional / this.factor); final int index = (this.factor == 0 ? regional : regional % this.factor); try { final long position = this.table[it].get(index); return position; } catch (final IOException e) { throw new IORuntimeException(e); } } /** * @param start * @return */ public long mapSRegionalToTRegional(final long sRegional) { long r = -1; try { for (int i = 0; i < this.table.length; i++) { if ((r = this.table[i].search(sRegional)) != -1) { r += i * this.factor; } } } catch (final IOException e) { throw new IORuntimeException(e); } return r; } /** * Takes a certain sample packet average by reading the first max number of * packets. * * @param loader * @param max * @return * @throws IOException */ private int takeAverageSample(final PartialLoader loader, final int max) throws IOException { final BasicRecordIterator iterator = new BasicRecordIterator(loader, loader.getLengthGetter()); int count = 0; int total = 0; while (iterator.hasNext() && (count < max)) { final ByteBuffer b = iterator.next(); total += loader.getLengthGetter().readLength(b); count++; } final int ave = total / count; return ave; } @Override public String toString() { final StringBuilder b = new StringBuilder("Idx"); return b.toString(); } /* * (non-Javadoc) * * @see com.slytechs.capture.file.indexer.RegionIndexer#keepInMemory(long, * long) */ public Object keepInMemory(long start, long count) throws IOException { count = (start + count <= this.length ? count : this.length - start); task = new SuperProgressTask("indexer"); final int first = (this.factor == 0 ? 0 : (int) start / this.factor); final int end = (this.factor == 0 ? 1 : (int) (start + count) / this.factor) + 1; scanTask = task.addTask("locking indexes into memory", end - start); final List<Object> list = new ArrayList<Object>(10); for (int i = first; i < end; i++) { list.add(table[i].keepInMemory(0, 1)); if (i % 1000 == 0) { scanTask.update(1000); } } scanTask.finish(); return list.toArray(); } }