Java tutorial
/******************************************************************************* * Copyright (C) 2011 Atlas of Living Australia * All Rights Reserved. * * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. ******************************************************************************/ package au.org.ala.delta.io; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.commons.lang.StringUtils; /** * Provides access to the contents of a file formatted for use by the Intkey and * Key programs. It currently mostly only supports sequential writes. */ public class BinaryKeyFile extends BinFile { // TODO - I nicked these from the Intkey project Constants class. public static final int RECORD_LENGTH_INTEGERS = 32; public static final int RECORD_LENGTH_BYTES = 128; public static final int SIZE_INT_IN_BYTES = Integer.SIZE / Byte.SIZE; public static final int SIZE_FLOAT_IN_BYTES = Float.SIZE / Byte.SIZE; public static final int DATASET_MAJOR_VERSION = 5; public static final int DATASET_MINOR_VERSION = 2; private Set<Integer> _occupiedRecords; public BinaryKeyFile(String fileName, BinFileMode mode) { super(fileName, mode); _occupiedRecords = new HashSet<Integer>(); } /** * Writes the supplied array of ints to the record identified by * <code>recordNumber</code>. If the number of values is larger than will * fit into a single record (>128) the write will continue into the next * record. * * @param recordNumber * the record number to write to. * @param values * the data to write to the record. * @return the number of records written. */ public int writeToRecord(int recordNumber, int[] values) { checkForOverwrite(recordNumber, 0, values.length * SIZE_INT_IN_BYTES); // Zero pad the values out to fill a full record. if (values.length % RECORD_LENGTH_INTEGERS != 0) { int newLength = (values.length / RECORD_LENGTH_INTEGERS + 1) * RECORD_LENGTH_INTEGERS; int[] newValues = new int[newLength]; Arrays.fill(newValues, 0); System.arraycopy(values, 0, newValues, 0, values.length); values = newValues; } seekToRecord(recordNumber); writeInts(values); return values.length / RECORD_LENGTH_INTEGERS; } public int writeToRecord(int recordNumber, int offset, byte[] values) { int numRecords = checkForOverwrite(recordNumber, offset, values.length); if (values.length % RECORD_LENGTH_BYTES != 0) { int newLength = (values.length / RECORD_LENGTH_BYTES + 1) * RECORD_LENGTH_BYTES; byte[] newValues = new byte[newLength]; Arrays.fill(newValues, (byte) 0); System.arraycopy(values, 0, newValues, 0, values.length); values = newValues; } seekToRecord(recordNumber, offset); write(values); return numRecords; } private int checkForOverwrite(int recordNumber, int offset, int numBytes) { int numRecords = (offset + numBytes / RECORD_LENGTH_BYTES); if ((offset + numBytes) % RECORD_LENGTH_BYTES > 0) { numRecords++; } for (int i = recordNumber; i < recordNumber + numRecords; i++) { if (_occupiedRecords.contains(i)) { throw new RuntimeException( "Writing " + (numBytes + offset) + " bytes to a record will overwrite the next record"); } _occupiedRecords.add(i); } return numRecords; } public void writeToRecord(int recordNumber, int value) { seekToRecord(recordNumber); writeInts(new int[] { value }); } public int writeToRecord(int recordNumber, String value) { return writeToRecord(recordNumber, 0, value); } public void writeToRecord(int recordNumber, byte value) { writeToRecord(recordNumber, 0, new byte[] { value }); } /** * Writes to consecutive records the length of the supplied string, then, in * the following record, the String (encoded using default BinFileEncoding) * * @param recordNumber * the record number to write the length to. The string itself * will be written to record recordNumber+1 * @param value * the string to write. */ public int writeStringWithLength(int recordNumber, String value) { if (value.length() == 0) { throw new RuntimeException("Cannot write zero length strings."); } writeToRecord(recordNumber, value.length()); int numRecords = writeToRecord(recordNumber + 1, 0, value); return numRecords + 1; } /** * Writes a single int at the supplied recordNumber, which is the record * number that actually contains the data. * The data is then written as writeStringWithLength(). */ public int writeIndirectStringWithLength(int recordNumber, String value) { if (value.length() == 0) { throw new RuntimeException("Cannot write zero length strings."); } int next = recordNumber + 1; writeToRecord(recordNumber, next); return writeStringWithLength(next, value) + 1; } /** * Writes a String to the identified record. Optionally w * * @param recordNumber * the record to write to. * @param offset * @param value * @param writeLength */ public int writeToRecord(int recordNumber, int offset, String value) { byte[] notesBytes = BinFileEncoding.encode(value); return writeToRecord(recordNumber, offset, notesBytes); } public int writeToRecord(int recordNumber, List<Integer> values) { ByteBuffer bytes = ByteBuffer.allocate(values.size() * SIZE_INT_IN_BYTES); bytes.order(ByteOrder.LITTLE_ENDIAN); for (int value : values) { bytes.putInt(value); } return writeToRecord(recordNumber, 0, bytes.array()); } /** * Designed to allow headers and index records to be overwritten. * * @param recordNumber * the first (of possibily many, depending on the number of * values) record to be overwritten. * @param values * the values to write, starting at record, recordNumber.. */ public void overwriteRecord(int recordNumber, List<Integer> values) { if (!_occupiedRecords.contains(recordNumber)) { throw new IllegalArgumentException("Record " + recordNumber + " has not been allocated."); } ByteBuffer bytes = ByteBuffer.allocate(values.size() * SIZE_INT_IN_BYTES); bytes.order(ByteOrder.LITTLE_ENDIAN); for (int value : values) { bytes.putInt(value); } seekToRecord(recordNumber, 0); write(bytes.array()); } public int writeBooleansToRecord(int recordNumber, List<Boolean> values) { ByteBuffer bytes = ByteBuffer.allocate(values.size() * SIZE_INT_IN_BYTES); bytes.order(ByteOrder.LITTLE_ENDIAN); for (boolean value : values) { bytes.putInt(value ? 1 : 0); } return writeToRecord(recordNumber, 0, bytes.array()); } public int writeFloatsToRecord(int recordNumber, List<Float> values) { ByteBuffer bytes = ByteBuffer.allocate(values.size() * SIZE_INT_IN_BYTES); bytes.order(ByteOrder.LITTLE_ENDIAN); for (float value : values) { bytes.putFloat(value); } return writeToRecord(recordNumber, 0, bytes.array()); } public int nextAvailableRecord() { int max = 0; if (_occupiedRecords.size() > 0) { max = Collections.max(_occupiedRecords); } seekToRecord(max + 1); return max + 1; } // Note that records are 1 indexed. public void seekToRecord(int recordNum) { seekToRecord(recordNum, 0); } public void seekToRecord(int recordNum, int offset) { seek(recordOffset(recordNum) + offset); } protected int getRecordNumber(int indexRecordNum, int offset) { int pos = recordOffset(indexRecordNum) + offset; seek(pos); return readInt(); } private int recordOffset(int recordNum) { return (recordNum - 1) * RECORD_LENGTH_BYTES; } public List<Integer> readIntegerList(int recordNum, int numInts) { seek(recordOffset(recordNum)); ByteBuffer bb = readByteBuffer(numInts * SIZE_INT_IN_BYTES); List<Integer> retList = new ArrayList<Integer>(); for (int i = 0; i < numInts; i++) { retList.add(bb.getInt()); } return retList; } public List<Float> readFloatList(int recordNum, int numFloats) { seek(recordOffset(recordNum)); ByteBuffer bb = readByteBuffer(numFloats * SIZE_INT_IN_BYTES); List<Float> retList = new ArrayList<Float>(); for (int i = 0; i < numFloats; i++) { retList.add(bb.getFloat()); } return retList; } public String readString(int recordNum, int numChars) { seek(recordOffset(recordNum)); ByteBuffer bb = readByteBuffer(numChars); return BinFileEncoding.decode(bb.array()); } public ByteBuffer readBytes(int recordNum, int numBytes) { seek(recordOffset(recordNum)); ByteBuffer bb = readByteBuffer(numBytes); return bb; } /** * Writes: 1) An index record that contains one entry per supplied value * which indicates which record that value is written to. 2) For each value: * 2.1) A record containing the length of the value. 2.2) The subsequent * record(s) containing the value. * * @param indexRecordNum * the record number to contain the index. * @param values * the values to write. */ public void writeIndexedValues(int indexRecordNum, String[] values) { writeIndexedValuesWithGap(indexRecordNum, 0, values); } public int writeIndexedValuesWithGap(int indexRecordNum, int gap, String[] values) { int[] indicies = new int[values.length]; Arrays.fill(indicies, 0); int recordNum = indexRecordNum + (int) Math.floor(indicies.length / RECORD_LENGTH_INTEGERS) + 1; int gapRecord = recordNum; recordNum += gap; for (int i = 0; i < values.length; i++) { if (StringUtils.isNotEmpty(values[i])) { indicies[i] = recordNum; recordNum += writeStringWithLength(recordNum, values[i]); } } writeToRecord(indexRecordNum, indicies); return gapRecord; } /** * Writes the supplied values in the form: 1) The record at startRecord will * contain values.length integers, each of which is the length of the value. * 2) The following record will contain the text from values, concatenated * into a single continuous string. * * @param startRecord * the record number for the lengths. * @param values * the values to write. */ public int writeAsContinousString(int startRecord, String[] values) { int[] lengths = new int[values.length]; StringBuilder text = new StringBuilder(); for (int i = 0; i < values.length; i++) { lengths[i] = values[i].length(); text.append(values[i]); } int numRecords = writeToRecord(startRecord, lengths); numRecords += writeToRecord(startRecord + numRecords, text.toString()); return numRecords; } protected List<Integer> bitSetToInts(BitSet set, int numValues) { List<Integer> values = new ArrayList<Integer>(); int i = 0; while (i < numValues) { int value = 0; while (i < numValues && (i - 32 * values.size()) / 32 == 0) { if (set.get(i)) { value |= 1 << i % 32; } i++; } values.add(value); } return values; } protected void writeStringsWithOffsetsToRecord(int startRecord, List<String> descriptions) { int[] offsets = new int[descriptions.size() + 1]; StringBuilder buffer = new StringBuilder(); // FORTRAN arrays are "1" indexed so any location pointers are off by 1. offsets[0] = 1; for (int i = 0; i < descriptions.size(); i++) { offsets[i + 1] = offsets[i] + descriptions.get(i).length(); buffer.append(descriptions.get(i)); } startRecord += writeToRecord(startRecord, offsets); writeToRecord(startRecord, buffer.toString()); } }