au.org.ala.delta.intkey.model.IntkeyDatasetFileReader.java Source code

Introduction

Here is the source code for au.org.ala.delta.intkey.model.IntkeyDatasetFileReader.java
Source

/*******************************************************************************
 * Copyright (C) 2011 Atlas of Living Australia
 * All Rights Reserved.
 * 
 * The contents of this file are subject to the Mozilla Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/MPL/
 * 
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 ******************************************************************************/
package au.org.ala.delta.intkey.model;

import au.org.ala.delta.directives.validation.DirectiveException;
import au.org.ala.delta.io.BinFile;
import au.org.ala.delta.io.BinFileEncoding;
import au.org.ala.delta.io.BinFileMode;
import au.org.ala.delta.model.Attribute;
import au.org.ala.delta.model.Character;
import au.org.ala.delta.model.CharacterDependency;
import au.org.ala.delta.model.CharacterFactory;
import au.org.ala.delta.model.CharacterType;
import au.org.ala.delta.model.DefaultDataSetFactory;
import au.org.ala.delta.model.DeltaDataSetFactory;
import au.org.ala.delta.model.IntegerAttribute;
import au.org.ala.delta.model.IntegerCharacter;
import au.org.ala.delta.model.Item;
import au.org.ala.delta.model.MultiStateAttribute;
import au.org.ala.delta.model.MultiStateCharacter;
import au.org.ala.delta.model.RealAttribute;
import au.org.ala.delta.model.RealCharacter;
import au.org.ala.delta.model.TextAttribute;
import au.org.ala.delta.model.TextCharacter;
import au.org.ala.delta.model.image.Image;
import au.org.ala.delta.model.image.ImageOverlay;
import au.org.ala.delta.model.image.ImageOverlayParser;
import au.org.ala.delta.model.image.ImageSettings;
import au.org.ala.delta.model.image.ImageSettings.FontInfo;
import au.org.ala.delta.model.image.ImageType;
import au.org.ala.delta.model.impl.CharacterData;
import au.org.ala.delta.model.impl.DefaultCharacterData;
import au.org.ala.delta.model.impl.DefaultImageData;
import au.org.ala.delta.model.impl.ItemData;
import au.org.ala.delta.model.impl.SimpleAttributeData;
import au.org.ala.delta.util.Pair;
import au.org.ala.delta.util.Utils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.FloatRange;
import org.apache.commons.lang.math.IntRange;

import java.io.File;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Utility class for reading intkey datasets
 * 
 * @author ChrisF
 * 
 */
public final class IntkeyDatasetFileReader {

    /**
     * The default word to use for "or" in natural language descriptions. This
     * is used if no such word is supplied in the dataset.
     */
    private static String DEFAULT_OR_WORD = "or";

    /**
     * Read an intkey dataset
     * 
     * @param charactersFile
     *            The intkey characters file. Usually named ichars.
     * @param itemsFile
     *            The item items (taxa) file. Usually named iitems.
     * @return An object representation of the intkey dataset
     */
    public static IntkeyDataset readDataSet(File charactersFile, File itemsFile) {

        // TODO should modify BinFile so that you can pass in a File.
        BinFile charBinFile = new BinFile(charactersFile.getAbsolutePath(), BinFileMode.FM_READONLY);
        BinFile itemBinFile = new BinFile(itemsFile.getAbsolutePath(), BinFileMode.FM_READONLY);

        IntkeyDataset ds = new IntkeyDataset();
        CharactersFileHeader charFileHeader = new CharactersFileHeader();
        ItemsFileHeader itemFileHeader = new ItemsFileHeader();
        List<au.org.ala.delta.model.Character> characters = new ArrayList<au.org.ala.delta.model.Character>();
        List<Item> taxa = new ArrayList<Item>();

        readCharactersFileHeader(charBinFile, charFileHeader);
        readItemsFileHeader(itemBinFile, itemFileHeader);

        // Check number of characters is same in two files
        if (charFileHeader.getNC() != itemFileHeader.getNChar()) {
            throw new RuntimeException("Characters and taxa files do not match");
        }

        // Check stated record length in items file is correct
        if (itemFileHeader.getLRec() != Constants.RECORD_LENGTH_INTEGERS) {
            throw new RuntimeException("Record length incorrect");
        }

        // Check file is correct version

        // Not sure why rpOmitOr is being checked here. Original syntax was
        // "fparam[last_used+1] != 0", where
        // last_used was set to 26. fparam was the array holding all of the
        // integers in the record. CPF 4/4/2011.
        if (itemFileHeader.getMajorVer() != Constants.DATASET_MAJOR_VERSION
                || (itemFileHeader.getMinorVer() != Constants.DATASET_MINOR_VERSION
                        && itemFileHeader.getRpOmitOr() != 0)) {
            throw new RuntimeException("Incorrect file version");
        }

        ds.setChineseFormat(itemFileHeader.getChineseFmt() != 0);

        readHeadingsAndValidationString(charFileHeader, charBinFile, itemBinFile, ds);

        readTaxonData(itemFileHeader, itemBinFile, taxa);
        readCharacters(charFileHeader, itemFileHeader, charBinFile, itemBinFile, characters, ds);

        readCharacterImages(charFileHeader, charBinFile, itemFileHeader, itemBinFile, characters);
        readStartupImages(charFileHeader, charBinFile, ds);
        readCharacterKeywordImages(charFileHeader, charBinFile, ds);
        readTaxonKeywordImages(charFileHeader, charBinFile, ds);
        readOrWord(charFileHeader, charBinFile, ds);
        readOverlayFonts(charFileHeader, charBinFile, ds);
        readCharacterItemSubheadings(charFileHeader, charBinFile, characters, ds);
        readRealCharacterKeyStateBoundaries(itemFileHeader, itemBinFile, characters);
        readTaxonImages(itemFileHeader, itemBinFile, taxa);

        ds.setCharactersFile(charactersFile);
        ds.setItemsFile(itemsFile);
        ds.setCharactersFileHeader(charFileHeader);
        ds.setItemsFileHeader(itemFileHeader);
        ds.setCharacters(characters);
        ds.setTaxa(taxa);

        // Dataset needs a reference to the open items file so that
        // attribute data can be read on demand later.
        ds.setItemsBinFile(itemBinFile);

        // Close the open characters file as it is no longer needed
        charBinFile.close();

        return ds;
    }

    /**
     * Read header information from the characters file
     * 
     * @param charBinFile
     *            The characters file
     * @param charFileHeader
     *            The object to store header information in
     */
    private static void readCharactersFileHeader(BinFile charBinFile, CharactersFileHeader charFileHeader) {
        // read first record which contains header file information;

        ByteBuffer headerBytes = readRecord(charBinFile, 1);

        // read first record of characters file
        charFileHeader.setNC(headerBytes.getInt()); // 0

        headerBytes.getInt(); // 1 - maxDes - not used.

        charFileHeader.setRpCdes(headerBytes.getInt()); // 2
        charFileHeader.setRpStat(headerBytes.getInt()); // 3
        charFileHeader.setRpChlp(headerBytes.getInt()); // 4
        charFileHeader.setRpChlpGrp(headerBytes.getInt()); // 5
        charFileHeader.setRpChlpFmt1(headerBytes.getInt()); // 6
        charFileHeader.setRpChlpFmt2(headerBytes.getInt()); // 7
        charFileHeader.setRpCImagesC(headerBytes.getInt()); // 8
        charFileHeader.setRpStartupImages(headerBytes.getInt()); // 9
        charFileHeader.setRpCKeyImages(headerBytes.getInt()); // 10
        charFileHeader.setRpTKeyImages(headerBytes.getInt()); // 11
        charFileHeader.setRpHeading(headerBytes.getInt()); // 12
        charFileHeader.setRpRegSubHeading(headerBytes.getInt()); // record
                                                                 // pointer to
                                                                 // registration
                                                                 // subheading
                                                                 // (13)
        charFileHeader.setRpValidationString(headerBytes.getInt()); // record
                                                                    // pointer
                                                                    // to
                                                                    // validation
                                                                    // string
                                                                    // for
                                                                    // registered
                                                                    // dataset
                                                                    // (14)

        headerBytes.getInt(); // 15 - record number for character mask - not
                              // used.

        charFileHeader.setRpOrWord(headerBytes.getInt()); // 16
        charFileHeader.setRpCheckForCd(headerBytes.getInt()); // 17
        charFileHeader.setRpFont(headerBytes.getInt()); // 18
        charFileHeader.setRpItemSubHead(headerBytes.getInt()); // 19

        headerBytes.position(Constants.RECORD_LENGTH_INTEGERS - 1);

        charFileHeader.setCptr(headerBytes.getInt());
    }

    /**
     * Read header information from the items (taxa) file
     * 
     * @param itemBinFile
     *            The items (taxa) file
     * @param itemFileHeader
     *            The object to store header information in
     */
    private static void readItemsFileHeader(BinFile itemBinFile, ItemsFileHeader itemFileHeader) {

        ByteBuffer headerBytes = readRecord(itemBinFile, 1);

        itemFileHeader.setNItem(headerBytes.getInt()); // number of items (0)
        itemFileHeader.setNChar(headerBytes.getInt()); // number of characters
                                                       // (1)
        itemFileHeader.setMs(headerBytes.getInt()); // maximum number of states
                                                    // (2)

        headerBytes.getInt(); // 3 - MaxDat - not used
        itemFileHeader.setLRec(headerBytes.getInt()); // 4 - record length used
                                                      // in items file

        itemFileHeader.setRpTnam(headerBytes.getInt()); // record pointer to
                                                        // taxon names (5)
        itemFileHeader.setRpSpec(headerBytes.getInt()); // record pointer to
                                                        // specifications (6)
        itemFileHeader.setRpMini(headerBytes.getInt()); // record pointer to
                                                        // minima of integer
                                                        // characters (7)
        itemFileHeader.setLDep(headerBytes.getInt()); // length of dependency
                                                      // array (8)
        itemFileHeader.setRpCdep(headerBytes.getInt()); // record pointer to
                                                        // character dependency
                                                        // array (9)
        itemFileHeader.setLinvdep(headerBytes.getInt()); // length of inverted
                                                         // dependency array
                                                         // (10)
        itemFileHeader.setRpInvdep(headerBytes.getInt()); // record pointer to
                                                          // inverted
                                                          // dependency array
                                                          // (11)
        itemFileHeader.setRpCdat(headerBytes.getInt()); // record pointer to
                                                        // data for each
                                                        // character (12)
        itemFileHeader.setLSbnd(headerBytes.getInt()); // length of state
                                                       // bounds array (13)
        itemFileHeader.setLkstat(Math.max(1, headerBytes.getInt())); // length
                                                                     // of key
                                                                     // states
                                                                     // array
                                                                     // (14)

        itemFileHeader.setMajorVer(headerBytes.getInt()); // 15

        itemFileHeader.setRpNkbd(headerBytes.getInt()); // record pointer to
                                                        // key state bounds
                                                        // array (16)
        itemFileHeader.setMaxInt(headerBytes.getInt()); // maximum integer
                                                        // value (17)

        headerBytes.getInt(); // 18 - Maxtxt1 - not used
        headerBytes.getInt(); // 19 - Maxtxt2 - not used
        itemFileHeader.setMinorVer(headerBytes.getInt()); // 20

        itemFileHeader.setTaxonImageChar(headerBytes.getInt()); // character
                                                                // specifying
                                                                // taxon images
                                                                // (21)
        itemFileHeader.setRpCimagesI(headerBytes.getInt()); // pointer to
                                                            // character images
                                                            // (22)
        itemFileHeader.setRpTimages(headerBytes.getInt()); // pointer to taxon
                                                           // images (23)
        itemFileHeader.setEnableDeltaOutput(headerBytes.getInt()); // whether
                                                                   // to allow
                                                                   // DELTA
                                                                   // output
                                                                   // via
                                                                   // OUTPUT
                                                                   // SUMMARY
                                                                   // command
                                                                   // (24)
        itemFileHeader.setChineseFmt(headerBytes.getInt()); // whether chinese
                                                            // character set
                                                            // (25)
        itemFileHeader.setRpCsynon(headerBytes.getInt()); // record pointer to
                                                          // characters for
                                                          // synonomy (26)
        itemFileHeader.setRpOmitOr(headerBytes.getInt()); // record pointer to
                                                          // "omit or" list of
                                                          // characters (27)
        itemFileHeader.setRpNext(headerBytes.getInt()); // pointer to second
                                                        // parameter record
                                                        // (28)

        itemFileHeader.setDupItemPtr(headerBytes.getInt()); // pointer to
                                                            // duplicated item
                                                            // name mask (29:
                                                            // Constants.LREC -
                                                            // 3)
        itemFileHeader.setTptr(headerBytes.getInt()); // pointer to b-tree and
                                                      // image masks appended
                                                      // to items file (30:
                                                      // Constants.LREC - 2)
        itemFileHeader.setLbtree(headerBytes.getInt()); // length of btree in
                                                        // bytes (31:
                                                        // Constants.LREC - 1)

        if (itemFileHeader.getRpNext() > 0) {
            ByteBuffer secondHeaderBytes = readRecord(itemBinFile, itemFileHeader.getRpNext());

            itemFileHeader.setRpUseCc(secondHeaderBytes.getInt());
            int rpTlinks1 = secondHeaderBytes.getInt();
            itemFileHeader.setRpOmitPeriod(secondHeaderBytes.getInt());
            itemFileHeader.setRpNewPara(secondHeaderBytes.getInt());
            itemFileHeader.setRpNonAutoCc(secondHeaderBytes.getInt());
            int rpTlinks2 = secondHeaderBytes.getInt();

            itemFileHeader.setRpTlinks(new int[] { rpTlinks1, rpTlinks2 });

        } else {
            itemFileHeader.setRpUseCc(0);
            itemFileHeader.setRpTlinks(new int[] { 0, 0 });
            itemFileHeader.setRpOmitPeriod(0);
            itemFileHeader.setRpNewPara(0);
            itemFileHeader.setRpNonAutoCc(0);
        }
    }

    /**
     * Read the dataset heading, subheading and validation string
     * 
     * @param charFileHeader
     *            Characters file header
     * @param charBinFile
     *            Characters file
     * @param itemBinFile
     *            Items (taxa) file
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with the read information
     */
    private static void readHeadingsAndValidationString(CharactersFileHeader charFileHeader, BinFile charBinFile,
            BinFile itemBinFile, IntkeyDataset ds) {
        // read and display data heading
        BinFile hFile;
        int recno;
        if (charFileHeader.getRpHeading() > 0) // heading is in chars file
        {
            hFile = charBinFile;
            recno = charFileHeader.getRpHeading();
        } else // heading is in items file
        {
            hFile = itemBinFile;
            recno = 2;
        }

        String heading = readReferencedString(hFile, recno);
        ds.setHeading(heading);
        // output to log window
        // set as heading of main window

        if (charFileHeader.getRpRegSubHeading() > 0) {
            // read and display registered dataset subheading
            ds.setSubHeading(readReferencedString(hFile, charFileHeader.getRpRegSubHeading()));
        }

        if (charFileHeader.getRpValidationString() > 0) {
            // read validation string
            ds.setValidationString(readReferencedString(hFile, charFileHeader.getRpValidationString()));
        }
    }

    /**
     * Read character data
     * 
     * @param charFileHeader
     *            Characters file header
     * @param itemFileHeader
     *            Items (taxa) file header
     * @param charBinFile
     *            Characters file
     * @param itemBinFile
     *            Items (taxa) file
     * @param characters
     *            List to populate with object representations of the dataset
     *            characters. The calling method must set this data on the
     *            object representation of the intkey dataset.
     * @param ds
     *            Object representation of the intkey dataset. This object will
     *            be updated with some of the read information, however note
     *            that the calling method must set the characters on the dataset
     *            using the list that is returned.
     */
    private static void readCharacters(CharactersFileHeader charFileHeader, ItemsFileHeader itemFileHeader,
            BinFile charBinFile, BinFile itemBinFile, List<Character> characters, IntkeyDataset ds) {

        int numChars = charFileHeader.getNC();

        // READ NUMBER OF CHARACTER STATES
        seekToRecord(charBinFile, charFileHeader.getRpStat());
        List<Integer> numCharacterStates = readIntegerList(charBinFile, numChars);

        // READ CHARACTER TYPES
        seekToRecord(itemBinFile, itemFileHeader.getRpSpec());
        List<Integer> charTypesList = readIntegerList(itemBinFile, numChars);

        // Used to determine whether or not output to delta format is permitted
        // - see below.
        int charTypeSum = 0;

        for (int i = 0; i < numChars; i++) {
            charTypeSum += charTypesList.get(i);

            int charType = Math.abs(charTypesList.get(i));

            au.org.ala.delta.model.Character newChar = null;
            CharacterData impl = new DefaultCharacterData(i + 1);

            switch (charType) {
            case 1:
                newChar = CharacterFactory.newCharacter(CharacterType.UnorderedMultiState, impl);
                break;
            case 2:
                newChar = CharacterFactory.newCharacter(CharacterType.OrderedMultiState, impl);
                break;
            case 3:
                newChar = CharacterFactory.newCharacter(CharacterType.IntegerNumeric, impl);
                break;
            case 4:
                newChar = CharacterFactory.newCharacter(CharacterType.RealNumeric, impl);
                break;
            case 5:
                newChar = CharacterFactory.newCharacter(CharacterType.Text, impl);
                break;
            default:
                throw new RuntimeException("Unrecognized character type");
            }

            // A char type of -4 indicates that the character is an integer
            // represented as a real.
            if (charTypesList.get(i) == -4) {
                ((RealCharacter) newChar).setIntegerRepresentedAsReal(true);
            }

            characters.add(newChar);
        }

        // A checksum is supplied in the items file. If this checksum matches
        // the sum of the
        // integers used to specify the character types, delta output is
        // enabled. Otherwise
        // delta output is disabled.
        readEnableDeltaOutput(charTypeSum, itemFileHeader, ds);

        int recordsSpannedByCharTypes = recordsSpannedByBytes(numChars * Constants.SIZE_INT_IN_BYTES);

        // read numbers of states from items file and check for compatability
        // (only compare multistates because if ICHARS and IITEMS are generated
        // separately, numerics characters with units will differ)
        seekToRecord(itemBinFile, itemFileHeader.getRpSpec() + recordsSpannedByCharTypes);
        List<Integer> itemsFileNumCharacterStates = readIntegerList(itemBinFile, numChars);

        for (int i = 0; i < numChars; i++) {
            Character ch = characters.get(i);
            if (ch instanceof MultiStateCharacter) {
                int numStatesFromCharsFile = numCharacterStates.get(i);
                int numStatesFromItemsFile = itemsFileNumCharacterStates.get(i);
                if (numStatesFromItemsFile != numStatesFromCharsFile) {
                    throw new RuntimeException(
                            "Numbers of states for characters differ between characters file and items file");
                }
            }
        }

        int recordsSpannedByNumCharStates = recordsSpannedByBytes(numChars * Constants.SIZE_INT_IN_BYTES);

        // READ CHARACTER RELIABILITIES
        seekToRecord(itemBinFile,
                itemFileHeader.getRpSpec() + (recordsSpannedByCharTypes + recordsSpannedByNumCharStates));

        List<Float> reliabilityList = readFloatList(itemBinFile, numChars);
        for (int i = 0; i < numChars; i++) {
            Character ch = characters.get(i);
            float reliability = reliabilityList.get(i);
            ch.setReliability(reliability);
        }

        readCharacterDescriptionsAndStates(charFileHeader, charBinFile, characters, numCharacterStates);
        readCharacterNotes(charFileHeader, charBinFile, characters, ds);
        readCharacterMinimumsAndMaximums(itemFileHeader, itemBinFile, characters);
        readCharacterDependencies(itemFileHeader, itemBinFile, characters);
        // readCharacterTaxonData();

        // READ CONTAINS SYNONMY INFORMATION
        List<Integer> synonmyInfoList = null;
        if (itemFileHeader.getRpCsynon() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpCsynon());
            synonmyInfoList = readIntegerList(itemBinFile, numChars);
        }

        // READ OMIT OR
        List<Integer> omitOrList = null;
        if (itemFileHeader.getRpOmitOr() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpOmitOr());
            omitOrList = readIntegerList(itemBinFile, numChars);
        }

        // READ USE CONTROLLING CHARACTERS FIRST
        List<Integer> useCcList = null;
        if (itemFileHeader.getRpUseCc() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpUseCc());
            useCcList = readIntegerList(itemBinFile, numChars);
        }

        // READ OMIT PERIOD
        List<Integer> omitPeriodList = null;
        if (itemFileHeader.getRpOmitPeriod() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpOmitPeriod());
            omitPeriodList = readIntegerList(itemBinFile, numChars);
        }

        // READ NEW PARAGRAPH
        List<Integer> newParagraphList = null;
        if (itemFileHeader.getRpNewPara() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpNewPara());
            newParagraphList = readIntegerList(itemBinFile, numChars);
        }

        // READ NON AUTOMATIC CONTROLLING CHARACTERS
        List<Integer> nonAutoCcList = null;
        if (itemFileHeader.getRpNonAutoCc() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpNonAutoCc());
            nonAutoCcList = readIntegerList(itemBinFile, numChars);
        }

        List<TextCharacter> synonymyCharacters = new ArrayList<TextCharacter>();

        for (int i = 0; i < numChars; i++) {
            Character ch = characters.get(i);

            if (synonmyInfoList != null) {
                ch.setContainsSynonmyInformation(synonmyInfoList.get(i) != 0);
                if (ch.getContainsSynonmyInformation()) {
                    if (ch instanceof TextCharacter) {
                        synonymyCharacters.add((TextCharacter) ch);
                    } else {
                        throw new RuntimeException("Only text characters can contains synonymy information");
                    }
                }
            }

            if (omitOrList != null) {
                ch.setOmitOr(omitOrList.get(i) != 0);
            }

            if (useCcList != null) {
                ch.setUseCc(useCcList.get(i) != 0);
            }

            if (omitPeriodList != null) {
                ch.setOmitPeriod(omitPeriodList.get(i) != 0);
            }

            if (newParagraphList != null) {
                ch.setNewParagraph(newParagraphList.get(i) != 0);
            }

            if (nonAutoCcList != null) {
                ch.setNonAutoCc(nonAutoCcList.get(i) != 0);
            }
        }

        ds.setSynonymyCharacters(synonymyCharacters);

    }

    /**
     * Read character descriptions and states
     * 
     * @param charFileHeader
     *            Characters file header data
     * @param charBinFile
     *            Characters file
     * @param characters
     *            List of object representations of dataset characters, ordered
     *            by character number. These objects will be updated with the
     *            read information.
     * @param numCharacterStates
     *            Number of characters for each dataset character, ordered by
     *            character number.
     */
    private static void readCharacterDescriptionsAndStates(CharactersFileHeader charFileHeader, BinFile charBinFile,
            List<Character> characters, List<Integer> numCharacterStates) {
        int numChars = charFileHeader.getNC();

        // READ CHARACTER DESCRIPTIONS
        seekToRecord(charBinFile, charFileHeader.getRpCdes());

        List<Integer> charDescriptionRecordIndicies = readIntegerList(charBinFile, numChars);

        for (int i = 0; i < numChars; i++) {
            au.org.ala.delta.model.Character ch = characters.get(i);

            int descRecordIndex = charDescriptionRecordIndicies.get(i);
            seekToRecord(charBinFile, descRecordIndex);

            int numStatesForChar = numCharacterStates.get(i);
            List<Integer> charDescriptionsLengths = readIntegerList(charBinFile, numStatesForChar + 1);
            int lengthTotal = 0;

            for (int charDescriptionLength : charDescriptionsLengths) {
                lengthTotal += charDescriptionLength;
            }

            int recordsSpannedByDescLengths = recordsSpannedByBytes(
                    (numStatesForChar + 1) * Constants.SIZE_INT_IN_BYTES);

            List<String> charStateDescriptions = new ArrayList<String>();

            seekToRecord(charBinFile, descRecordIndex + recordsSpannedByDescLengths);
            ByteBuffer descBuffer = charBinFile.readByteBuffer(lengthTotal);

            for (int k = 0; k < charDescriptionsLengths.size(); k++) {
                int len = charDescriptionsLengths.get(k);
                byte[] descArray = new byte[len];
                descBuffer.get(descArray);

                String descriptionText = BinFileEncoding.decode(descArray);

                if (k == 0) {
                    // First description listed is the character description
                    ch.setDescription(descriptionText);
                } else {
                    charStateDescriptions.add(descriptionText);
                }
            }

            if (ch instanceof IntegerCharacter) {
                if (charStateDescriptions.size() == 1) {
                    ((IntegerCharacter) ch).setUnits(charStateDescriptions.get(0));
                } else if (charStateDescriptions.size() > 1) {
                    throw new RuntimeException(
                            "Integer characters should only have one state listed which represents the units description.");
                }
            } else if (ch instanceof RealCharacter) {
                if (charStateDescriptions.size() == 1) {
                    ((RealCharacter) ch).setUnits(charStateDescriptions.get(0));
                } else if (charStateDescriptions.size() > 1) {
                    throw new RuntimeException(
                            "Real numeric characters should only have one state listed which represents the units description.");
                }
            } else if (ch instanceof MultiStateCharacter) {
                MultiStateCharacter multiStateChar = (MultiStateCharacter) ch;

                multiStateChar.setNumberOfStates(charStateDescriptions.size());

                for (int l = 0; l < charStateDescriptions.size(); l++) {
                    multiStateChar.setState(l + 1, charStateDescriptions.get(l));
                }
            } else {
                if (charStateDescriptions.size() > 0) {
                    throw new RuntimeException("Text characters should not have a state specified");
                }
            }
        }
    }

    /**
     * Read notes for each dataset character
     * 
     * @param charFileHeader
     *            Characters file header
     * @param charBinFile
     *            Characters file
     * @param characters
     *            List of object representations of dataset characters, ordered
     *            by character number. These objects will be updated with the
     *            read information.
     * @param ds
     *            Object representation of the intkey dataset. This object will
     *            be updated with the character notes formatting information, if
     *            such data is supplied with the dataset.
     */
    private static void readCharacterNotes(CharactersFileHeader charFileHeader, BinFile charBinFile,
            List<Character> characters, IntkeyDataset ds) {
        int numChars = charFileHeader.getNC();

        // READ TEXT OF CHARACTER NOTES
        if (charFileHeader.getRpChlp() > 0) {
            List<String> characterNotes = readStringList(charBinFile, charFileHeader.getRpChlp(), numChars);

            for (int i = 0; i < numChars; i++) {
                characters.get(i).setNotes(characterNotes.get(i));
            }

        }

        // READ CHARACTER NOTES FORMATTING INFORMATION

        // Formatting information for when character notes are output to main
        // intkey window
        if (charFileHeader.getRpChlpFmt1() > 0) {
            ds.setMainCharNotesFormattingInfo(readReferencedString(charBinFile, charFileHeader.getRpChlpFmt1()));
        }

        // Formatting information for when character notes are output to help
        // window
        if (charFileHeader.getRpChlpFmt2() > 0) {
            ds.setHelpCharNotesFormattingInfo(readReferencedString(charBinFile, charFileHeader.getRpChlpFmt2()));
        }
    }

    /**
     * Read maximum and minimum values for integer characters
     * 
     * @param itemFileHeader
     *            Items file header data
     * @param itemBinFile
     *            Items file
     * 
     * @param characters
     *            List of object representations of dataset characters, ordered
     *            by character number. These objects will be updated with the
     *            read information.
     */
    private static void readCharacterMinimumsAndMaximums(ItemsFileHeader itemFileHeader, BinFile itemBinFile,
            List<Character> characters) {
        int numChars = itemFileHeader.getNChar();

        if (itemFileHeader.getRpMini() != 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpMini());

            List<Integer> minimumValues = readIntegerList(itemBinFile, numChars);

            int recordsSpannedByMinimumValues = recordsSpannedByBytes(numChars * Constants.SIZE_INT_IN_BYTES);

            seekToRecord(itemBinFile, itemFileHeader.getRpMini() + recordsSpannedByMinimumValues);

            List<Integer> maximumValues = readIntegerList(itemBinFile, numChars);

            for (int i = 0; i < numChars; i++) {
                Character c = characters.get(i);

                if (c instanceof IntegerCharacter) {
                    IntegerCharacter intChar = (IntegerCharacter) c;

                    int minValue = minimumValues.get(i);
                    int maxValue = maximumValues.get(i);

                    intChar.setMinimumValue(minValue);
                    intChar.setMaximumValue(maxValue);
                }
            }
        }
    }

    /**
     * Read character dependencies
     * 
     * @param itemFileHeader
     *            Items file header data
     * @param itemBinFile
     *            Items file
     * @param characters
     *            List of object representations of dataset characters, ordered
     *            by character number. These objects will be updated with the
     *            read information.
     */
    private static void readCharacterDependencies(ItemsFileHeader itemFileHeader, BinFile itemBinFile,
            List<Character> characters) {
        DeltaDataSetFactory factory = new DefaultDataSetFactory();
        int numChars = itemFileHeader.getNChar();

        // If LDep is 0, there are no dependencies. Otherwise dependency data
        // consists of LDep integers, starting at record
        // rpCdep.
        if (itemFileHeader.getLDep() >= numChars && itemFileHeader.getRpCdep() > 0) {
            seekToRecord(itemBinFile, itemFileHeader.getRpCdep());
            List<Integer> dependencyData = readIntegerList(itemBinFile, itemFileHeader.getLDep());
            // At the start of the dependency data there is an integer value for
            // each character.
            // If non zero, the value is an offset further down the list where
            // its dependency data is.
            // Otherwise the character does not have any dependent characters.
            for (int i = 0; i < numChars; i++) {
                int charDepIndex = dependencyData.get(i);
                if (charDepIndex > 0) {
                    au.org.ala.delta.model.Character c = characters.get(i);
                    if (!(c instanceof MultiStateCharacter)) {
                        throw new RuntimeException("Only multistate characters can be controlling characters");
                    }

                    MultiStateCharacter controllingChar = (MultiStateCharacter) c;

                    int numStates = controllingChar.getStates().length;

                    // The dependency data for each character consists of one
                    // integer for each of the character's states. If the
                    // integer
                    // value listed for a state is non-zero, the value is an
                    // offset pointing to further down the list where
                    // the state's dependency data is.
                    int stateDepIndiciesStart = charDepIndex - 1;
                    int stateDepIndiciesEnd = charDepIndex - 1 + numStates;
                    List<Integer> stateDepRecordIndicies = dependencyData.subList(stateDepIndiciesStart,
                            stateDepIndiciesEnd);

                    // We need to coalesce the dependency data so that we have
                    // one CharacterDependency object per
                    // controlling character and set of states that make a set
                    // of dependent characters inapplicable.
                    // Use this map to keep track of the state ids that make the
                    // same set of dependent characters
                    // inapplicable.
                    Map<Set<Integer>, Set<Integer>> depCharsToStateIds = new HashMap<Set<Integer>, Set<Integer>>();

                    for (int j = 0; j < numStates; j++) {
                        Integer stateId = j + 1;

                        int stateDepRecordIndex = stateDepRecordIndicies.get(j);

                        if (stateDepRecordIndex > 0) {
                            // First value listed in the state's dependency data
                            // is the number of character ranges dependent on
                            // that state.
                            int numDependentCharRanges = dependencyData.get(stateDepRecordIndex - 1);

                            // Immediately after the range information is listed
                            // - the upper and lower bound is listed for each
                            // range.
                            List<Integer> rangeNumbers = dependencyData.subList(stateDepRecordIndex,
                                    stateDepRecordIndex + (numDependentCharRanges * 2));

                            Set<Integer> dependentChars = new HashSet<Integer>();

                            for (int k = 0; k < numDependentCharRanges * 2; k = k + 2) {
                                int lowerBound = rangeNumbers.get(k);
                                int upperBound = rangeNumbers.get(k + 1);

                                IntRange r = new IntRange(lowerBound, upperBound);

                                for (int dependentChar : r.toArray()) {
                                    dependentChars.add(dependentChar);
                                }
                            }

                            if (depCharsToStateIds.containsKey(dependentChars)) {
                                Set<Integer> stateSet = depCharsToStateIds.get(dependentChars);
                                stateSet.add(stateId);
                            } else {
                                Set<Integer> stateSet = new HashSet<Integer>();
                                stateSet.add(stateId);
                                depCharsToStateIds.put(dependentChars, stateSet);
                            }
                        }
                    }

                    // Now that we have coalesced the dependency data into the
                    // form we need, we can
                    // create the CharacterDependency objects.
                    for (Set<Integer> depCharsSet : depCharsToStateIds.keySet()) {
                        Set<Integer> stateSet = depCharsToStateIds.get(depCharsSet);
                        CharacterDependency charDep = factory.createCharacterDependency(controllingChar, stateSet,
                                depCharsSet);
                        c.addDependentCharacters(charDep);
                        for (int idxDependentChar : depCharsSet) {
                            // need to subtract one from the index because
                            // the data file uses 1 based indexes while
                            // java uses zero based indexes.
                            Character dependentCharacter = characters.get(idxDependentChar - 1);

                            dependentCharacter.addControllingCharacter(charDep);
                        }
                    }
                }
            }
        }
    }

    /**
     * Read taxon data
     * 
     * @param itemFileHeader
     *            Items (taxa) file header data
     * @param itemBinFile
     *            Items (taxa) file
     * @param taxa
     *            List of object representation of taxa, ordered by taxon
     *            number. The calling method must set this data on the object
     *            representation of the intkey dataset.
     */
    private static void readTaxonData(ItemsFileHeader itemFileHeader, BinFile itemBinFile, List<Item> taxa) {

        int numItems = itemFileHeader.getNItem();

        for (int i = 0; i < numItems; i++) {
            ItemData itemData = new IntkeyItemData(i + 1);
            Item item = new Item(itemData);
            taxa.add(item);
        }

        // READ TAXON NAMES - rpTnam
        seekToRecord(itemBinFile, itemFileHeader.getRpTnam());

        List<Integer> taxonNameOffsets = readIntegerList(itemBinFile, numItems + 1);

        int recordsSpannedByOffsets = recordsSpannedByBytes(taxonNameOffsets.size() * Constants.SIZE_INT_IN_BYTES);

        seekToRecord(itemBinFile, itemFileHeader.getRpTnam() + recordsSpannedByOffsets);

        ByteBuffer nameBuffer = itemBinFile.readByteBuffer(taxonNameOffsets.get(taxonNameOffsets.size() - 1));
        nameBuffer.position(0);

        for (int i = 0; i < numItems; i++) {
            int start = taxonNameOffsets.get(i);
            int end = taxonNameOffsets.get(i + 1);
            int nameLength = end - start;
            byte[] nameArray = new byte[nameLength];
            nameBuffer.get(nameArray);
            taxa.get(i).setDescription(BinFileEncoding.decode(nameArray));
        }

        readTaxonLinksFiles(itemFileHeader, itemBinFile, taxa);
    }

    /**
     * Read character images
     * 
     * @param charFileHeader
     *            The characters file header
     * @param charBinFile
     *            The characters file
     * @param itemFileHeader
     *            The items (taxa) file header data
     * @param itemBinFile
     *            The items (taxa) file
     * @param characters
     *            The list of object representations of characters in character
     *            number order. These objects will be updated with the read
     *            image information.
     */
    private static void readCharacterImages(CharactersFileHeader charFileHeader, BinFile charBinFile,
            ItemsFileHeader itemFileHeader, BinFile itemBinFile, List<Character> characters) {
        int numChars = charFileHeader.getNC();

        // Character image info has been shifted from items file to characters
        // file.
        // However, to maintain compatability with older datasets, need to
        // determine
        // in which file the information resides
        int rpCImages = 0;
        BinFile imagesFile = null;
        if (charFileHeader.getRpCImagesC() != 0) {
            rpCImages = charFileHeader.getRpCImagesC();
            imagesFile = charBinFile;
        } else if (itemFileHeader.getRpCimagesI() != 0) {
            rpCImages = itemFileHeader.getRpCimagesI();
            imagesFile = itemBinFile;
        }

        if (rpCImages != 0) {
            List<String> charactersImageData = readStringList(imagesFile, rpCImages, numChars);

            for (int i = 0; i < numChars; i++) {
                Character ch = characters.get(i);

                String imagesData = charactersImageData.get(i);

                if (imagesData != null) {
                    List<Pair<String, String>> imageData = parseFileData(imagesData);
                    for (Pair<String, String> pair : imageData) {
                        Image image = createImage(pair.getFirst(), pair.getSecond(), ImageType.IMAGE_CHARACTER);
                        ch.addImage(image);
                    }
                }
            }
        }
    }

    /**
     * Read taxon images
     * 
     * @param itemFileHeader
     *            The items (taxa) file reader information
     * @param itemBinFile
     *            The items (taxa) File
     * @param taxa
     *            The list of object representations of taxa in taxon number
     *            order. These objects will be updated with the read image
     *            information.
     */
    private static void readTaxonImages(ItemsFileHeader itemFileHeader, BinFile itemBinFile, List<Item> taxa) {
        int numItems = itemFileHeader.getNItem();
        int recNo = itemFileHeader.getRpTimages();

        if (recNo != 0) {
            List<String> taxaImageData = readStringList(itemBinFile, recNo, numItems);
            for (int i = 0; i < numItems; i++) {
                Item taxon = taxa.get(i);

                String imagesData = taxaImageData.get(i);

                if (imagesData != null) {
                    List<Pair<String, String>> imageData = parseFileData(imagesData);
                    for (Pair<String, String> pair : imageData) {
                        Image image = createImage(pair.getFirst(), pair.getSecond(), ImageType.IMAGE_TAXON);
                        taxon.addImage(image);
                    }
                }
            }
        }
    }

    /**
     * Read dataset startup images
     * 
     * @param charFileHeader
     *            The characters file header data
     * @param charBinFile
     *            The characters file
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with the read image information
     */
    private static void readStartupImages(CharactersFileHeader charFileHeader, BinFile charBinFile,
            IntkeyDataset ds) {
        if (charFileHeader.getRpStartupImages() > 0) {
            seekToRecord(charBinFile, charFileHeader.getRpStartupImages());

            int imageDataRecord = charBinFile.readInt();
            String startupImagesData = readReferencedString(charBinFile, imageDataRecord);
            if (!StringUtils.isEmpty(startupImagesData)) {
                List<Image> startupImages = new ArrayList<Image>();
                List<Pair<String, String>> imageData = parseFileData(startupImagesData);
                for (Pair<String, String> pair : imageData) {
                    Image image = createImage(pair.getFirst(), pair.getSecond(), ImageType.IMAGE_STARTUP);
                    startupImages.add(image);
                }
                ds.setStartupImages(startupImages);
            }
        }
    }

    /**
     * Read character keyword images
     * 
     * @param charFileHeader
     *            The characters file header data
     * @param charBinFile
     *            The characters file
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with the read image information
     */
    private static void readCharacterKeywordImages(CharactersFileHeader charFileHeader, BinFile charBinFile,
            IntkeyDataset ds) {
        if (charFileHeader.getRpCKeyImages() > 0) {
            seekToRecord(charBinFile, charFileHeader.getRpCKeyImages());

            int imageDataRecord = charBinFile.readInt();
            String characterKeywordImagesData = readReferencedString(charBinFile, imageDataRecord);
            if (!StringUtils.isEmpty(characterKeywordImagesData)) {
                List<Image> characterKeywordImages = new ArrayList<Image>();
                List<Pair<String, String>> imageData = parseFileData(characterKeywordImagesData);
                for (Pair<String, String> pair : imageData) {
                    Image image = createImage(pair.getFirst(), pair.getSecond(), ImageType.IMAGE_CHARACTER_KEYWORD);
                    characterKeywordImages.add(image);
                }
                ds.setCharacterKeywordImages(characterKeywordImages);
            }
        }
    }

    /**
     * Read taxon keyword images
     * 
     * @param charFileHeader
     *            The characters file header data
     * @param charBinFile
     *            The characters file
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with the read image information
     */
    private static void readTaxonKeywordImages(CharactersFileHeader charFileHeader, BinFile charBinFile,
            IntkeyDataset ds) {
        if (charFileHeader.getRpTKeyImages() > 0) {
            seekToRecord(charBinFile, charFileHeader.getRpTKeyImages());

            int imageDataRecord = charBinFile.readInt();
            String taxonKeywordImagesData = readReferencedString(charBinFile, imageDataRecord);
            if (!StringUtils.isEmpty(taxonKeywordImagesData)) {
                List<Image> taxonKeywordImages = new ArrayList<Image>();
                List<Pair<String, String>> imageData = parseFileData(taxonKeywordImagesData);
                for (Pair<String, String> pair : imageData) {
                    Image image = createImage(pair.getFirst(), pair.getSecond(), ImageType.IMAGE_TAXON_KEYWORD);
                    taxonKeywordImages.add(image);
                }
                ds.setTaxonKeywordImages(taxonKeywordImages);
            }
        }
    }

    /**
     * Read the word to use for "or" when generating natural language
     * descriptions. If no such data is supplied in the dataset, the
     * DEFAULT_OR_WORD is used.
     * 
     * @param charFileHeader
     *            Characters file header data
     * @param charBinFile
     *            Characters file
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with the read image information
     */
    private static void readOrWord(CharactersFileHeader charFileHeader, BinFile charBinFile, IntkeyDataset ds) {
        int recordNo = charFileHeader.getRpOrWord();
        String orWord = null;
        if (recordNo != 0) {
            seekToRecord(charBinFile, recordNo);
            int orWordLength = charBinFile.readInt();
            seekToRecord(charBinFile, recordNo + 1);
            orWord = readString(charBinFile, orWordLength);
        } else {
            orWord = DEFAULT_OR_WORD;
        }

        ds.setOrWord(orWord);
    }

    /**
     * Read information about the fonts to use when generating text labels on
     * image overlays
     * 
     * @param charFileHeader
     *            Characters file header data
     * @param charBinFile
     *            Characters file
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with the read image information
     */
    private static void readOverlayFonts(CharactersFileHeader charFileHeader, BinFile charBinFile,
            IntkeyDataset ds) {
        int recordNo = charFileHeader.getRpFont();
        if (recordNo != 0) {
            seekToRecord(charBinFile, recordNo);

            // single integer showing the number of fonts
            int numFonts = charBinFile.readInt();

            seekToRecord(charBinFile, recordNo + 1);
            List<Integer> fontTextLengths = readIntegerList(charBinFile, numFonts);

            int totalFontsLength = 0;
            for (int fontLength : fontTextLengths) {
                totalFontsLength += fontLength;
            }

            int recordsSpannedByFontTextLengths = recordsSpannedByBytes(numFonts * Constants.SIZE_INT_IN_BYTES);
            seekToRecord(charBinFile, recordNo + 1 + recordsSpannedByFontTextLengths);

            List<FontInfo> fonts = new ArrayList<FontInfo>();
            ByteBuffer fontTextData = charBinFile.readByteBuffer(totalFontsLength);
            for (int fontLength : fontTextLengths) {
                byte[] fontTextBytes = new byte[fontLength];
                fontTextData.get(fontTextBytes);
                String fontText = BinFileEncoding.decode(fontTextBytes);
                FontInfo fontInfo = null;
                try {
                    fontInfo = parseOverlayFontString(fontText);
                } catch (Exception e) {
                    // A workaround for corrupt font info in some of the crustacea.net keys.
                    System.err.println("Error parsing font info: " + fontText);
                    fontInfo = new ImageSettings().getDefaultFontInfo();
                }
                fonts.add(fontInfo);
            }

            ds.setOverlayFonts(fonts);
        }
    }

    /**
     * Parse the string used to specify an overlay font
     * 
     * @param fontInfoStr
     *            String representation of an overlay font
     * @return Object representation of an overlay font
     */
    private static FontInfo parseOverlayFontString(String fontInfoStr) {

        String[] tokens = fontInfoStr.split(" ");
        int size = Integer.parseInt(tokens[0]);
        int weight = Integer.parseInt(tokens[1]);
        boolean italic = Integer.parseInt(tokens[2]) > 0;
        int pitch = Integer.parseInt(tokens[3]);
        int family = Integer.parseInt(tokens[4]);
        int charSet = Integer.parseInt(tokens[5]);
        String name = StringUtils.join(Arrays.copyOfRange(tokens, 6, tokens.length), ' ');

        return new FontInfo(size, weight, italic, pitch, family, charSet, name);
    }

    /**
     * Read character item subheadings. These are used in natural language
     * descriptions
     * 
     * @param charFileHeader
     *            Characters file header data
     * @param charBinFile
     *            Characters file
     * @param characters
     *            The list of object representations of characters in character
     *            number order. These objects will be updated with character
     *            item subheading data as appropriate.
     * @param ds
     *            Object representation of intkey dataset. This object will be
     *            updated with a boolean specifying whether or not character
     *            item subheadings are present in the dataset.
     */
    private static void readCharacterItemSubheadings(CharactersFileHeader charFileHeader, BinFile charBinFile,
            List<Character> characters, IntkeyDataset ds) {
        int numChars = charFileHeader.getNC();
        int recordNo = charFileHeader.getRpItemSubHead();
        if (recordNo != 0) {
            ds.setItemSubheadingsPresent(true);
            List<String> itemSubheadings = readStringList(charBinFile, recordNo, numChars);

            for (int i = 0; i < numChars; i++) {
                characters.get(i).setItemSubheading(itemSubheadings.get(i));
            }
        }
    }

    /**
     * Read key state boundaries for real-value characters. These are used to
     * convert real values for characters into multistate values for use in the
     * BEST algorithm.
     * 
     * @param itemFileHeader
     *            Items (taxa) file header data.
     * @param itemBinFile
     *            Items (taxa) file
     * @param characters
     *            The list of object representations of characters in character
     *            number order. These objects will be updated with key state
     *            boundaries as appropriate.
     */
    private static void readRealCharacterKeyStateBoundaries(ItemsFileHeader itemFileHeader, BinFile itemBinFile,
            List<Character> characters) {
        int numChars = itemFileHeader.getNChar();
        int recNo = itemFileHeader.getRpNkbd();

        if (recNo != 0) {
            seekToRecord(itemBinFile, recNo);
            List<Integer> keyStateBoundariesRecordIndicies = readIntegerList(itemBinFile, numChars);

            for (int i = 0; i < numChars; i++) {
                Character ch = characters.get(i);
                if (ch instanceof RealCharacter) {
                    RealCharacter realChar = (RealCharacter) ch;

                    int keyStateBoundariesRecord = keyStateBoundariesRecordIndicies.get(i);
                    seekToRecord(itemBinFile, keyStateBoundariesRecord);
                    int numKeyStateBoundaries = itemBinFile.readInt();
                    seekToRecord(itemBinFile, keyStateBoundariesRecord + 1);

                    List<Float> keyStateBoundaries = readFloatList(itemBinFile, numKeyStateBoundaries);
                    realChar.setKeyStateBoundaries(keyStateBoundaries);
                }
            }
        }
    }

    // A checksum is supplied in the items file. If this checksum matches the
    // sum of the
    // integers used to specify the character types, delta output is enabled.
    // Otherwise
    // delta output is disabled.
    private static void readEnableDeltaOutput(int calculatedChecksum, ItemsFileHeader itemFileHeader,
            IntkeyDataset ds) {
        boolean deltaOutputEnabled = false;

        int fileChecksum = itemFileHeader.getEnableDeltaOutput();

        if (fileChecksum != 0) {
            if (fileChecksum == calculatedChecksum) {
                deltaOutputEnabled = true;
            }
        }

        ds.setDeltaOutputPermitted(deltaOutputEnabled);
    }

    /**
     * Read links to files to list for taxa in the intkey taxon information
     * dialog.
     * 
     * @param itemFileHeader
     *            Items (taxa) file header
     * @param itemBinFile
     *            Items (taxa) file
     * @param taxa
     *            The list of object representations of characters in character
     *            number order. These objects will be updated with key state
     *            boundaries as appropriate.
     */
    private static void readTaxonLinksFiles(ItemsFileHeader itemFileHeader, BinFile itemBinFile, List<Item> taxa) {
        int numItems = itemFileHeader.getNItem();

        // from TAXON LINKS Confor directive
        List<String> firstLinksFileData = null;

        // from SUBJECT FOR OUTPUT FILES Confor directive
        List<String> secondLinksFileData = null;

        if (itemFileHeader.getRpTlinks()[0] != 0) {
            firstLinksFileData = readStringList(itemBinFile, itemFileHeader.getRpTlinks()[0], numItems);
        }

        if (itemFileHeader.getRpTlinks()[1] != 0) {
            secondLinksFileData = readStringList(itemBinFile, itemFileHeader.getRpTlinks()[1], numItems);
        }

        for (int i = 0; i < numItems; i++) {
            Item it = taxa.get(i);

            List<Pair<String, String>> taxonLinks = new ArrayList<Pair<String, String>>();

            // links from SUBJECT FOR OUTPUT FILES Confor directive (there
            // should only
            // be one) go in the list first.
            if (secondLinksFileData != null) {
                if (secondLinksFileData.get(i) != null) {
                    List<Pair<String, String>> parsedLinks = parseFileData(secondLinksFileData.get(i));
                    for (Pair<String, String> pair : parsedLinks) {
                        String fileName = pair.getFirst();
                        String subject = pair.getSecond();
                        subject = subject.replace("<", "");
                        subject = subject.replace(">", "");
                        subject = subject.replace("@subject", "");
                        subject = subject.trim();
                        taxonLinks.add(new Pair<String, String>(fileName, subject));
                    }
                }
            }

            if (firstLinksFileData != null) {
                if (firstLinksFileData.get(i) != null) {
                    List<Pair<String, String>> parsedLinks = parseFileData(firstLinksFileData.get(i));
                    for (Pair<String, String> pair : parsedLinks) {
                        String fileName = pair.getFirst();
                        String subject = pair.getSecond();
                        subject = subject.replace("<", "");
                        subject = subject.replace(">", "");
                        subject = subject.replace("@subject", "");
                        subject = subject.trim();
                        taxonLinks.add(new Pair<String, String>(fileName, subject));
                    }
                }
            }

            it.setLinkFiles(taxonLinks);
        }
    }

    /**
     * Read all attributes for the specified character and taxa. Attribute data
     * is read off disk on demand because it is often too large to store in
     * memory.
     * 
     * @param itemFileHeader
     *            Items (taxa) file header data
     * @param itemBinFile
     *            Items (taxa) file
     * @param ch
     *            The character to read attributes for
     * @param taxa
     *            The taxa to read attributes for
     * @return A list of attributes in the order of the list of taxa supplied to
     *         the method
     */
    public static List<Attribute> readAllAttributesForCharacter(ItemsFileHeader itemFileHeader, BinFile itemBinFile,
            Character ch, List<Item> taxa) {
        return readAttributes(itemFileHeader, itemBinFile, ch, taxa);
    }

    /**
     * Read the attribute for a character/taxon pair. Attribute data is read off
     * disk on demand because it is often too large to store in memory.
     * 
     * @param itemFileHeader
     *            Items (taxa) file header data
     * @param itemBinFile
     *            Items (taxa) file
     * @param ch
     *            The character to read the attribute for
     * @param taxon
     *            The taxon to read the character for
     * @return The attribute data for the character/taxon pair.
     */
    public static Attribute readAttribute(ItemsFileHeader itemFileHeader, BinFile itemBinFile, Character ch,
            Item taxon) {
        List<Item> taxonInList = new ArrayList<Item>();
        taxonInList.add(taxon);
        List<Attribute> attrList = readAttributes(itemFileHeader, itemBinFile, ch, taxonInList);
        return attrList.get(0);
    }

    /**
     * Read attributes from the items file
     * 
     * @param itemFileHeader
     *            item file header
     * @param itemBinFile
     *            item file data
     * @param c
     *            character that we want attributes for
     * @param taxa
     *            taxa that we want attributes for
     * @return a list of attributes for the supplied character and taxa.
     */
    private static List<Attribute> readAttributes(ItemsFileHeader itemFileHeader, BinFile itemBinFile, Character c,
            List<Item> taxa) {
        List<Attribute> retList = new ArrayList<Attribute>();

        int totalNumChars = itemFileHeader.getNChar();
        int totalNumTaxa = itemFileHeader.getNItem();

        seekToRecord(itemBinFile, itemFileHeader.getRpCdat());
        List<Integer> charAttributeDataRecordIndicies = readIntegerList(itemBinFile, totalNumChars);

        // Subtract 1 from the charNo because characters are zero indexed in
        // intkey API
        int charNo = c.getCharacterId();
        int charTaxonDataRecordIndex = charAttributeDataRecordIndicies.get(charNo - 1);

        seekToRecord(itemBinFile, charTaxonDataRecordIndex);

        if (c instanceof MultiStateCharacter) {

            MultiStateCharacter multiStateChar = (MultiStateCharacter) c;

            int bitsPerTaxon = multiStateChar.getStates().length + 1;
            int totalBitsNeeded = bitsPerTaxon * totalNumTaxa;
            int bytesToRead = Double.valueOf(Math.ceil(Double.valueOf(totalBitsNeeded) / Double.valueOf(Byte.SIZE)))
                    .intValue();

            byte[] bytes = new byte[bytesToRead];
            itemBinFile.readBytes(bytes);
            boolean[] taxaData = Utils.byteArrayToBooleanArray(bytes);

            for (Item t : taxa) {
                int startIndex = (t.getItemNumber() - 1) * bitsPerTaxon; // Taxa
                                                                         // numbers
                                                                         // are
                                                                         // 1
                                                                         // indexed
                                                                         // instead
                                                                         // of 0
                                                                         // indexed
                int endIndex = startIndex + bitsPerTaxon;

                boolean[] taxonData = Arrays.copyOfRange(taxaData, startIndex, endIndex);

                // Taxon data consists of a bit for each state, indicating
                // the states presence, followed by
                // a final bit signifying whether or not the character is
                // inapplicable for the taxon.
                boolean inapplicable = taxonData[taxonData.length - 1];

                HashSet<Integer> presentStates = new HashSet<Integer>();
                for (int k = 0; k < taxonData.length - 1; k++) {
                    boolean statePresent = taxonData[k];
                    if (statePresent) {
                        presentStates.add(k + 1);
                    }
                }

                SimpleAttributeData attrData = new SimpleAttributeData(presentStates.isEmpty(), inapplicable);
                MultiStateAttribute msAttr = new MultiStateAttribute(multiStateChar, attrData);
                msAttr.setItem(t);

                msAttr.setPresentStates(presentStates);

                retList.add(msAttr);
            }

        } else if (c instanceof IntegerCharacter) {
            IntegerCharacter intChar = (IntegerCharacter) c;
            int charMinValue = intChar.getMinimumValue();
            int charMaxValue = intChar.getMaximumValue();

            // 1 bit for all values below minimum, 1 bit for each value between
            // minimum and maximum (inclusive),
            // 1 bit for all values above maximum, 1 inapplicability bit.
            int bitsPerTaxon = charMaxValue - charMinValue + 4;
            int totalBitsNeeded = bitsPerTaxon * totalNumTaxa;

            int bytesToRead = Double.valueOf(Math.ceil(Double.valueOf(totalBitsNeeded) / Double.valueOf(Byte.SIZE)))
                    .intValue();

            byte[] bytes = new byte[bytesToRead];
            itemBinFile.readBytes(bytes);
            boolean[] taxaData = Utils.byteArrayToBooleanArray(bytes);

            for (Item t : taxa) {
                int startIndex = (t.getItemNumber() - 1) * bitsPerTaxon; // Taxa
                                                                         // numbers
                                                                         // are
                                                                         // 1
                                                                         // indexed
                                                                         // instead
                                                                         // of 0
                                                                         // indexed
                int endIndex = startIndex + bitsPerTaxon;

                boolean[] taxonData = Arrays.copyOfRange(taxaData, startIndex, endIndex);

                boolean inapplicable = taxonData[taxonData.length - 1];

                Set<Integer> presentValues = new HashSet<Integer>();
                for (int k = 0; k < taxonData.length - 1; k++) {
                    boolean present = taxonData[k];
                    if (present) {
                        presentValues.add(k + charMinValue - 1);
                    }
                }

                IntegerAttribute intAttr = new IntegerAttribute(intChar,
                        new SimpleAttributeData(presentValues.isEmpty(), inapplicable));
                intAttr.setItem(t);
                intAttr.setPresentValues(presentValues);

                retList.add(intAttr);
            }

        } else if (c instanceof RealCharacter) {
            // Read NI inapplicability bits
            int bytesToRead = Double.valueOf(Math.ceil(Double.valueOf(totalNumTaxa) / Double.valueOf(Byte.SIZE)))
                    .intValue();
            byte[] bytes = new byte[bytesToRead];
            itemBinFile.readBytes(bytes);
            boolean[] taxaInapplicabilityData = Utils.byteArrayToBooleanArray(bytes);

            int recordsSpannedByInapplicabilityData = recordsSpannedByBytes(bytesToRead);

            seekToRecord(itemBinFile, charTaxonDataRecordIndex + recordsSpannedByInapplicabilityData);

            // Read two float values per taxon
            List<Float> taxonData = readFloatList(itemBinFile, totalNumTaxa * 2);

            for (Item t : taxa) {
                int taxonNumber = t.getItemNumber();

                float lowerFloat = taxonData.get((taxonNumber - 1) * 2);
                float upperFloat = taxonData.get(((taxonNumber - 1) * 2) + 1);

                boolean inapplicable = taxaInapplicabilityData[taxonNumber - 1];

                // Character is unknown for the corresponding taxon if
                // lowerfloat > upperfloat
                boolean unknown = lowerFloat > upperFloat;

                RealAttribute realAttr = new RealAttribute((RealCharacter) c,
                        new SimpleAttributeData(unknown, inapplicable));

                if (!unknown) {
                    FloatRange range = new FloatRange(lowerFloat, upperFloat);
                    realAttr.setPresentRange(range);
                }
                realAttr.setItem(t);

                retList.add(realAttr);
            }

        } else if (c instanceof TextCharacter) {
            TextCharacter textChar = (TextCharacter) c;

            // Read NI inapplicability bits
            int bytesToRead = Double.valueOf(Math.ceil(Double.valueOf(totalNumTaxa) / Double.valueOf(Byte.SIZE)))
                    .intValue();
            byte[] bytes = new byte[bytesToRead];
            itemBinFile.readBytes(bytes);
            boolean[] taxaInapplicabilityData = Utils.byteArrayToBooleanArray(bytes);

            int recordsSpannedByInapplicabilityData = recordsSpannedByBytes(bytesToRead);

            seekToRecord(itemBinFile, charTaxonDataRecordIndex + recordsSpannedByInapplicabilityData);

            List<Integer> taxonTextDataOffsets = readIntegerList(itemBinFile, totalNumTaxa + 1);

            int recordsSpannedByOffsets = recordsSpannedByBytes((totalNumTaxa + 1) * Constants.SIZE_INT_IN_BYTES);

            seekToRecord(itemBinFile,
                    charTaxonDataRecordIndex + recordsSpannedByInapplicabilityData + recordsSpannedByOffsets);

            ByteBuffer taxonTextData = itemBinFile.readByteBuffer(
                    taxonTextDataOffsets.get(taxonTextDataOffsets.size() - taxonTextDataOffsets.get(0)));

            for (Item t : taxa) {
                int taxonNumber = t.getItemNumber();

                int lowerOffset = taxonTextDataOffsets.get(taxonNumber - 1);
                int upperOffset = taxonTextDataOffsets.get((taxonNumber - 1) + 1);
                int textLength = upperOffset - lowerOffset;

                String txt = "";
                if (textLength > 0) {
                    byte[] textBytes = new byte[textLength];
                    taxonTextData.position(lowerOffset - 1);
                    taxonTextData.get(textBytes);

                    txt = BinFileEncoding.decode(textBytes);
                }

                boolean inapplicable = taxaInapplicabilityData[taxonNumber - 1];
                boolean unknown = StringUtils.isEmpty(txt);

                TextAttribute txtAttr = new TextAttribute(textChar, new SimpleAttributeData(unknown, inapplicable));
                try {
                    txtAttr.setText(txt);
                } catch (DirectiveException e) {
                    // The SimpleAttributeData implementation won't throw this
                    // Exception.
                }
                txtAttr.setItem(t);

                retList.add(txtAttr);

            }
        }

        return retList;
    }

    // --------------- UTILITY METHODS
    // --------------------------------------------------------

    /**
     * Seek to the specified record in the supplied binary file
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param recordNumber
     *            The record to seek to. Note that records are 1 indexed.
     */
    private static void seekToRecord(BinFile bFile, int recordNumber) {
        bFile.seek((recordNumber - 1) * Constants.RECORD_LENGTH_INTEGERS * Constants.SIZE_INT_IN_BYTES);
    }

    /**
     * Read the supplied record from the supplied binary file
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param recordNumber
     *            The record to read. Note that records are 1 indexed.
     * @return the bytes of the specified record.
     */
    private static ByteBuffer readRecord(BinFile bFile, int recordNumber) {
        seekToRecord(bFile, recordNumber);
        return bFile.readByteBuffer(Constants.RECORD_LENGTH_INTEGERS * Constants.SIZE_INT_IN_BYTES);
    }

    /**
     * Read a string from the current pointer location in the supplied binary
     * file.
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param numBytes
     * @return
     */
    private static String readString(BinFile bFile, int numBytes) {
        byte[] bytes = bFile.read(numBytes);
        return BinFileEncoding.decode(bytes);
    }

    /**
     * Helper method to deal with a common pattern in intkey data files - one
     * record contains a single integer which is the length of the string in
     * bytes, the following record contains the text of the string
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param recordNumber
     *            The record containing the length of a string which is stored
     *            in the following record. Note that records are 1 indexed.
     * @return the bytes of the specified record.
     */
    private static String readReferencedString(BinFile bFile, int recordNumber) {
        seekToRecord(bFile, recordNumber);
        int stringLength = bFile.readInt();
        seekToRecord(bFile, recordNumber + 1);
        return readString(bFile, stringLength);
    }

    /**
     * Helper method to deal with a common pattern in intkey data files - a
     * record contains N integer values, each of which, if non-zero point to
     * records from which a string can be read using readReferencedString (see
     * above)
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param recordNumber
     *            The record containing a refer. Note that records are 1
     *            indexed.
     * 
     * @return the list of strings
     */
    private static List<String> readStringList(BinFile bFile, int recordNumber, int listSize) {
        List<String> returnList = new ArrayList<String>();

        seekToRecord(bFile, recordNumber);
        List<Integer> stringReferences = readIntegerList(bFile, listSize);

        for (int stringReference : stringReferences) {
            if (stringReference != 0) {
                returnList.add(readReferencedString(bFile, stringReference));
            } else {
                returnList.add(null);
            }
        }

        return returnList;
    }

    /**
     * Read a list of integers from the current pointer location in the supplied
     * binary file
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param numInts
     *            the number of integers to read
     * @return the list of integers
     */
    private static List<Integer> readIntegerList(BinFile bFile, int numInts) {
        ByteBuffer bb = bFile.readByteBuffer(numInts * Constants.SIZE_INT_IN_BYTES);

        List<Integer> retList = new ArrayList<Integer>();
        for (int i = 0; i < numInts; i++) {
            retList.add(bb.getInt());
        }
        return retList;
    }

    /**
     * Read a list of floating-point values from the current pointer location in
     * the supplied binary file
     * 
     * @param bFile
     *            the binary (characters or taxa) file
     * @param numFloats
     *            the number of floating point values to read
     * @return the list of floating point values
     */
    private static List<Float> readFloatList(BinFile bFile, int numFloats) {
        ByteBuffer bb = bFile.readByteBuffer(numFloats * Constants.SIZE_INT_IN_BYTES);

        List<Float> retList = new ArrayList<Float>();
        for (int i = 0; i < numFloats; i++) {
            retList.add(bb.getFloat());
        }
        return retList;
    }

    /**
     * Get the number of records spanned by the supplied number of bytes
     * 
     * @param numBytes
     *            The number of bytes
     * @return The number of records spanned by the supplied number of bytes,
     *         rounded up.
     */
    private static int recordsSpannedByBytes(int numBytes) {
        return (int) (Math.ceil((double) numBytes / (double) Constants.RECORD_LENGTH_BYTES));
    }

    /**
     * Parse a string containing filenames and metadata data in the format
     * filename {<file information>} filename {<file information>} ... where
     * <file information> is optional and return a list of filename and file
     * information pairs.
     * 
     * @param fileData
     *            file data string in the format described above
     * @return A list of file name, file information pairs. If no file
     *         information is supplied for an item, this value will be null in
     *         the pair.
     */
    public static List<Pair<String, String>> parseFileData(String fileData) {
        List<Pair<String, String>> retList = new ArrayList<Pair<String, String>>();

        List<String> separateFileDataList = separateFileDataStrings(fileData);

        for (String sepFileData : separateFileDataList) {
            String fileName = null;
            String fileInfo = null;

            if (sepFileData.contains("<")) {
                int firstOpenBracketIndex = sepFileData.indexOf('<');
                fileName = sepFileData.substring(0, firstOpenBracketIndex).trim();
                fileInfo = sepFileData.substring(firstOpenBracketIndex).trim();
            } else {
                fileName = sepFileData;
            }

            retList.add(new Pair<String, String>(fileName, fileInfo));
        }

        return retList;
    }

    /**
     * take a string with format:
     * "filename <file information> <more file information> ..." and return a
     * list with two items, the first being the filename, and the second being
     * all of the file information.
     * 
     * @param filesData
     *            file information string in the format described above
     * @return a list with two items, the first being the filename and the
     *         second being all of the file information
     */
    public static List<String> separateFileDataStrings(String filesData) {
        List<String> filesDataList = new ArrayList<String>();

        int endLastSubstring = -1;
        boolean inBracket = false;

        String[] tokens = filesData.split(" ");

        for (int i = 0; i < tokens.length; i++) {
            String token = tokens[i];

            if (token.startsWith("<") && !token.endsWith(">")) {
                inBracket = true;
            } else if (token.endsWith(">")) {
                inBracket = false;
            } else if (i > 0 && !inBracket) {
                String[] subList = Arrays.copyOfRange(tokens, endLastSubstring + 1, i);
                filesDataList.add(StringUtils.join(subList, " "));
                endLastSubstring = i - 1;
            }

            if (i == tokens.length - 1) {
                String[] subList = Arrays.copyOfRange(tokens, endLastSubstring + 1, i + 1);
                filesDataList.add(StringUtils.join(subList, " "));
                endLastSubstring = i;
                continue;
            }

        }

        return filesDataList;
    }

    /**
     * Create an object representation of an intkey dataset image.
     * 
     * @param fileName
     *            File name for the image
     * @param comments
     *            comments for the image
     * @param imageType
     *            The image type, as defined in
     *            au.org.ala.delta.model.image.ImageType
     * @return The object representation of the dataset image
     */
    private static Image createImage(String fileName, String comments, int imageType) {
        DefaultImageData imageData = new DefaultImageData(fileName);
        Image image = new Image(imageData);
        try {
            if (comments != null) {
                ImageOverlayParser parser = new ImageOverlayParser();
                parser.setColorsBGR(true);
                List<ImageOverlay> overlayList = parser.parseOverlays(comments, imageType);
                imageData.setOverlays(overlayList);
            }
            return image;
        } catch (Exception ex) {
            throw new RuntimeException("Error parsing image overlay data");
        }
    }
}