org.sead.repositories.reference.RefRepository.java Source code

Introduction

Here is the source code for org.sead.repositories.reference.RefRepository.java
Source

/*
 *
 * Copyright 2015 University of Michigan
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *
 * @author myersjd@umich.edu
 * 
 */

package org.sead.repositories.reference;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.file.Paths;

import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
import javax.ws.rs.core.StreamingOutput;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.input.CountingInputStream;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.json.JSONObject;
import org.sead.nds.repository.BagGenerator;
import org.sead.nds.repository.C3PRPubRequestFacade;
import org.sead.nds.repository.PubRequestFacade;
import org.sead.nds.repository.Repository;
import org.sead.repositories.reference.util.RefLocalContentProvider;
import org.sead.repositories.reference.util.ReferenceLinkRewriter;

import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.MappingJsonFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;

/**
 * RefRepository generates new data publications and manages the RESTful
 * interface to the published data packages stored as Zip files. It generates
 * the landing page for a given DOI and, based on the landing URL, finds the
 * corresponding zip and extracts the relevant data/metadata. To help with this,
 * it extracts the oremap and a short description file that includes just the
 * top-level description and top-level children from the oremap, and an index
 * defining the offsets, within the oremap file, for the json description for
 * each AggregatedResource (e.g. a collection or dataset (1.5) or Dataset/File
 * (2.0).
 *
 */

@Path("/")
public class RefRepository extends Repository {

    private static final Logger log = Logger.getLogger(RefRepository.class);

    static ObjectMapper mapper = new ObjectMapper();

    public RefRepository() {
    }

    /*
     * BackwardCompatibility - Tomcat 6 seems to require that Repository be
     * initialized in this class even though it is initialized in the
     * RepoContextListener class in the same app It does not appear that this is
     * needed in Tomcat 7 ...
     */
    static {
        Repository.init(loadProperties());
    }

    private static String roId = null;
    private static String localRequest = null;
    private static String localContentSource = null;
    private static boolean validateOnly = false;

    public static void main(String[] args) {
        PropertyConfigurator.configure("./log4j.properties");
        init(loadProperties());
        if (args.length == 0) {
            printUsage();
        }
        roId = args[0];

        if (args.length > 1) {
            int i = 1;
            while (i < args.length) {
                if (args[i].startsWith("-")) {
                    char flag = args[i].charAt(1);
                    switch (flag) {
                    case 'l':
                        localRequest = args[i + 1];
                        System.out.println("Local Pub Request: " + localRequest);
                        i++;
                        break;
                    case 'r':
                        localContentSource = args[i + 1];
                        System.out.println("LocalContentSource: " + localContentSource);
                        i++;
                        break;
                    case 'v':
                        validateOnly = true;
                        break;
                    default:
                        printUsage();
                        break;

                    }
                }
                i += 1;
            }
        }
        /*
         * At this point we have an RO ID and possibly a local pub request and
         * possibly a local Content source.
         */
        C3PRPubRequestFacade RO = null;
        RO = new RefRepoLocalPubRequestFacade(roId, localRequest, getProps());
        BagGenerator bg;
        bg = new BagGenerator(RO);
        if (validateOnly) {
            bg.validateBag(roId);
            log.info("Validation Complete.");
            System.exit(0);
            ;
        }
        // Request human approval if needed - will send a fail status and
        // exit if request is denied
        localContentSource = handleRepub(RO, bg, localContentSource);
        bg.setLinkRewriter(new ReferenceLinkRewriter(getProps().getProperty("repo.landing.base")));
        // FixMe - use repo.ID from properties file (possibly in repo class

        // If using local Content and it is the same RO ID as the new pub (just
        // reprocessing an existing RO, make the BagGenerator
        // use a temp file (and not overwrite the local RO with an empty version
        // at the start).
        boolean useTemp = (localContentSource != null) && (localContentSource.equals(roId));
        if (bg.generateBag(roId, (useTemp))) {
            RO.sendStatus(PubRequestFacade.SUCCESS_STAGE,
                    RO.getOREMap().getJSONObject("describes").getString("External Identifier"));
            System.out.println(
                    "Publication was successful. New publication is in: " + RefRepository.getDataPathTo(roId));
            if (localContentSource != null) {
                System.out.println("New Publication was intended to replace " + localContentSource);
                System.out.println("Old publication is in " + RefRepository.getDataPathTo(localContentSource)
                        + " and could now be deleted.");
            }
        } else {
            RO.sendStatus(PubRequestFacade.FAILURE_STAGE,
                    "Processing of this request has failed. Further attempts to process this request may or may not be made. Please contact the repository for further information.");
        }

        System.exit(0);
    }

    private static void printUsage() {
        System.out.println("Could not parse requuest: No processing will occur.");
        System.out.println(
                "Usage:  <RO Identifier> <-l <optional local pubRequest file (path to JSON document)>> <-r <local Content Source RO ID>> <-v>");
        System.out.println(
                "-v - validateOnly - assumes a zip file for this RO ID exists and will attempt to validate the stored files w.r.t. the hash values in the oremap.");
        System.out.println(
                "Note: RO identifier is always sent and must match the identifier in any local pub Request file used.");
        System.out.println("Note: A local content source will override info sent as an alternateOf Preference.");

        System.exit(0);
    }

    private static String handleRepub(C3PRPubRequestFacade RO, BagGenerator bg, String localSource) {

        JSONObject request = RO.getPublicationRequest();
        JSONObject prefs = request.getJSONObject("Preferences");
        Scanner input = new Scanner(System.in);
        if (prefs.has("External Identifier")) {
            String extIdPref = prefs.getString("External Identifier");
            System.out.println("This publication is intended to replace " + extIdPref);
            if (!((String) getProps().get("repo.allowupdates")).equalsIgnoreCase("true")) {
                System.out.println("NOTE: Since updates are not allowed, a new DOI will be generated.");
            }
            System.out.println("Proceed (Y/N)?: ");

            if (!input.next().equalsIgnoreCase("y")) {
                input.close();
                RO.sendStatus(PubRequestFacade.FAILURE_STAGE,
                        "This request has been denied as a replacement for the existing publication: " + extIdPref
                                + ". Please contact the repository for further information.");
                System.exit(0);
            }
        }
        if (localSource == null && prefs.has("alternateOf")) {
            // Add a LocalContent class
            localSource = prefs.getString("alternateOf");
            System.out.println("Setting local content source to alternateOf value: " + localSource);
        }
        if (localSource != null) {
            System.out.println("Looking at: " + localSource + " for local content.");
            log.info("Looking at: " + localSource + " for local content.");
            RefLocalContentProvider ref = new RefLocalContentProvider(localSource, Repository.getProps());
            if (ref.getHashType() != null) {
                bg.setLocalContentProvider(ref);
                System.out.println("Proceeding with : " + localSource + " for local content.");
            } else {

                System.out
                        .println("Original RO not found/has no usable hash entries: " + getDataPathTo(localSource));
                System.out.println("Proceed (using remote content)? {Y/N}: ");
                if (!input.next().equalsIgnoreCase("y")) {
                    input.close();
                    RO.sendStatus(PubRequestFacade.FAILURE_STAGE,
                            "This request won't be processed due to a problem in finding local data copies: "
                                    + localSource + ". Please contact the repository for further information.");
                    System.exit(0);
                }
                localSource = null;
            }
        }

        input.close();
        return localSource;
    }

    /**
     * @Path("/researchobjects")
     * 
     *                           Returns the base landingpage html
     */
    @Path("/repository")
    @Produces(MediaType.APPLICATION_JSON)
    @GET
    public Response getRepositoryInfo() {
        String id = Repository.getID();
        String SEADServicesURL = Repository.getC3PRAddress();
        URL repoInfo = null;
        try {
            repoInfo = new URL(SEADServicesURL + "api/repositories/" + URLEncoder.encode(id, "UTF-8"));
            // Make a connect to the server
            log.debug("Connecting to: " + repoInfo.toString());
            HttpURLConnection conn = null;
            conn = (HttpURLConnection) repoInfo.openConnection();

            conn.setDoInput(true);
            conn.setUseCaches(false);
            InputStream is = conn.getInputStream();

            return Response.ok(is).build();

        } catch (MalformedURLException e) {
            log.error("Bad Repo URL");
        } catch (IOException e) {
            log.warn("Could not contact c3pr: " + repoInfo.toString());
        }
        log.debug("Unable to refer to repository info @ c3pr");
        return Response.status(com.sun.jersey.api.client.ClientResponse.Status.INTERNAL_SERVER_ERROR).build();
    }

    /*
     * @Path("/researchobjects")
     * 
     * Returns the base landingpage html
     */

    @Path("/researchobjects/{id}")
    @Produces(MediaType.TEXT_HTML)
    @GET
    public Response getLandingPage(@PathParam(value = "id") String id) {
        URI landingPage = null;
        try {
            landingPage = new URI("../landing.html#" + URLEncoder.encode(id, "UTF-8"));

        } catch (URISyntaxException e) {
            log.warn(e.getMessage() + " id: " + id);
        } catch (UnsupportedEncodingException e) {
            log.warn("UTF-8 not supported");
        }
        log.debug("Referring to : " + landingPage.toString());
        // Fairly permanent, but using temporary to keep the permanent html and
        // json URLs for the RO the same...
        return Response.temporaryRedirect(landingPage).build();

    }

    /*
     * @Path("/researchobjects/{id}")
     * 
     * Returns the description file for the Aggregation
     * 
     * /researchobjects/{id}/metadata returns this plus the top level of
     * children
     */

    @Path("/researchobjects/{id}")
    @Produces(MediaType.APPLICATION_JSON)
    @GET
    public Response getAggregationSummary(@PathParam(value = "id") String id) {

        File descFile;
        try {
            // get or generate this file
            descFile = getDescFile(id);
        } catch (Exception e1) {
            log.error(e1.getLocalizedMessage(), e1);
            return Response.serverError().build();
        }
        log.debug("Ready to send desc file");
        try {
            final FileInputStream fis = new FileInputStream(descFile);

            StreamingOutput stream = new StreamingOutput() {
                public void write(OutputStream os) throws IOException, WebApplicationException {
                    IOUtils.copy(fis, os);
                    fis.close();
                }
            };

            return Response.ok(stream).build();
        } catch (IOException e) {
            e.printStackTrace();
            return Response.serverError().build();
        }
    }

    /*
     * @Path("/researchobjects/{id}/metadata")
     * 
     * Returns the description for the Aggregation (the Aggregation metadata and
     * the descriptions of the AggregatedResources at the top-level(direct
     * children listed in 'HasPart')
     */

    @Path("/researchobjects/{id}/metadata")
    @Produces(MediaType.APPLICATION_JSON)
    @GET
    public Response getResourceMetadata(@PathParam(value = "id") String id) {
        String path = getDataPathTo(id);
        String bagNameRoot = getBagNameRoot(id);

        File result = new File(path, bagNameRoot + ".zip");
        if (!result.exists()) {
            return Response.status(Status.NOT_FOUND).build();
        }

        log.debug(result.getAbsolutePath());
        CountingInputStream cis = null;
        try {
            // Check for index files
            File indexFile = getIndexFile(id);
            File oremap = getOREMapFile(id);
            // Find/open base ORE map file
            // Note - limited to maxint size for oremap file size
            cis = new CountingInputStream(
                    new BufferedInputStream(new FileInputStream(oremap), Math.min((int) oremap.length(), 1000000)));
            JsonNode resultNode = getAggregation(id, indexFile, cis, true, oremap.length());
            if (resultNode == null) {
                log.warn("Null item returned");
            }

            return Response.ok(resultNode.toString()).build();
        } catch (JsonParseException e) {
            log.error(e);
            e.printStackTrace();
            return Response.serverError().entity(e.getMessage()).build();
        } catch (IOException e) {
            log.error(e);
            e.printStackTrace();
            return Response.serverError().entity(e.getMessage()).build();
        } finally {
            IOUtils.closeQuietly(cis);
        }
    }

    /*
     * @Path("/researchobjects/{id}/metadata/{did}")
     * 
     * Returns the description for the AggregationResource within the {id}
     * Aggregation (the AggregatedResource metadata and the descriptions of the
     * AggregatedResources directly within it (direct children listed in
     * 'HasPart'))
     */

    private File getOREMapFile(String id) {
        File map = null;
        String path = getDataPathTo(id);
        String bagNameRoot = getBagNameRoot(id);

        map = new File(path, bagNameRoot + ".oremap.jsonld.txt");
        if (!map.exists()) {
            createMap(map, path, bagNameRoot);

        }
        return map;
    }

    protected static void createMap(File map, String path, String bagNameRoot) {
        ZipFile zf = null;
        try {
            log.info("Caching oremap: " + map.getPath());
            // Note: This step can be VERY slow when something is being
            // published on the same disk - minutes for a large file
            // If you don't see the "Zipfile opened" message in the log,
            // look at disk I/O...
            File result = new File(path, bagNameRoot + ".zip");
            zf = new ZipFile(result);
            log.debug("Zipfile opened");
            ZipEntry archiveEntry1 = zf.getEntry(bagNameRoot + "/oremap.jsonld.txt");
            InputStream source = zf.getInputStream(archiveEntry1);
            OutputStream sink = new FileOutputStream(map);
            IOUtils.copy(source, sink);
            IOUtils.closeQuietly(source);
            IOUtils.closeQuietly(sink);
            log.debug("ORE Map written: " + result.getCanonicalPath());
        } catch (Exception e) {
            log.error("Cannot read zipfile to create cached oremap: " + map.getPath(), e);
            e.printStackTrace();
        } finally {
            IOUtils.closeQuietly(zf);
        }
    }

    @Path("/researchobjects/{id}/metadata/{did}")
    @Produces(MediaType.APPLICATION_JSON)
    @GET
    public Response getResourceSummary(@PathParam(value = "id") String id,
            @PathParam(value = "did") String dataID) {
        log.debug("Getting " + dataID + " from " + id);
        String path = getDataPathTo(id);
        String bagNameRoot = getBagNameRoot(id);

        File result = new File(path, bagNameRoot + ".zip");
        if (!result.exists()) {
            return Response.status(Status.NOT_FOUND).build();
        }
        CountingInputStream cis = null;
        try {
            File indexFile = getIndexFile(id);

            // Find/open base ORE map file
            // Note - limited to maxint size for oremap file size
            File map = getOREMapFile(id);
            cis = new CountingInputStream(
                    new BufferedInputStream(new FileInputStream(map), Math.min((int) map.length(), 1000000)));

            JsonNode resultNode = getItem(dataID, indexFile, cis, true, map.length());
            if (resultNode == null) {
                log.warn("Null item returned");
            }

            return Response.ok(resultNode.toString()).build();
        } catch (JsonParseException e) {
            log.error(e);
            e.printStackTrace();
            return Response.serverError().entity(e.getMessage()).build();
        } catch (IOException e) {
            log.error(e);
            e.printStackTrace();
            return Response.serverError().entity(e.getMessage()).build();
        } finally {
            IOUtils.closeQuietly(cis);
        }
    }

    /*
     * @Path("/researchobjects/{id}/data/{relpath}")
     * 
     * Returns the data file (any file within the /data directory) at the given
     * path within the {id} publication
     * 
     * Note: The original version using the apache compress ZiFile class used
     * for generating the bags can be extremely slow when reading large files
     * (e.g. 20+ minutes for a 600GB file), even when all we do is extract one
     * file. The java.uti.zip.ZipFile class seems to work normally (<second).
     */

    @Path("/researchobjects/{id}/data/{relpath}")
    @Produces(MediaType.APPLICATION_OCTET_STREAM)
    @GET
    public Response getDatafile(@PathParam(value = "id") String id, @PathParam(value = "relpath") String datapath) {
        String path = getDataPathTo(id);

        String bagNameRoot = getBagNameRoot(id);
        File result = new File(path, bagNameRoot + ".zip");
        StreamingOutput stream = null;

        try {
            final ZipFile zf = new ZipFile(result);
            ZipEntry archiveEntry1 = zf.getEntry(bagNameRoot + "/data/" + datapath);
            if (archiveEntry1 != null) {
                final InputStream inputStream = new BufferedInputStream(zf.getInputStream(archiveEntry1));

                stream = new StreamingOutput() {
                    public void write(OutputStream os) throws IOException, WebApplicationException {
                        IOUtils.copy(inputStream, os);
                        IOUtils.closeQuietly(os);
                        IOUtils.closeQuietly(inputStream);
                        IOUtils.closeQuietly(zf);
                    }
                };
            }
        } catch (IOException e) {
            log.error(e.getLocalizedMessage());
            e.printStackTrace();
        }
        if (stream == null) {
            return Response.serverError().build();
        }

        return Response.ok(stream).build();
    }

    /*
     * @Path("/researchobjects/{id}/meta/{relpath}")
     * 
     * Returns the metadata file (a file not in the /data dir) at the given path
     * within the {id} publication
     */

    @Path("/researchobjects/{id}/meta/{relpath}")
    @Produces(MediaType.APPLICATION_OCTET_STREAM)
    @GET
    public Response getMetadatafile(@PathParam(value = "id") String id,
            @PathParam(value = "relpath") String metadatapath) {
        String path = getDataPathTo(id);

        String bagNameRoot = getBagNameRoot(id);
        File result = new File(path, bagNameRoot + ".zip");

        // Don't let this call be used to get data from the data dir
        if (metadatapath.startsWith("data") || metadatapath.startsWith("/data")) {
            return Response.status(Status.BAD_REQUEST).build();
        }
        StreamingOutput stream = null;
        try {
            final ZipFile zf = new ZipFile(result);
            ZipEntry archiveEntry1 = zf.getEntry(bagNameRoot + "/" + metadatapath);
            if (archiveEntry1 != null) {
                final InputStream inputStream = new BufferedInputStream(zf.getInputStream(archiveEntry1));
                stream = new StreamingOutput() {
                    public void write(OutputStream os) throws IOException, WebApplicationException {
                        IOUtils.copy(inputStream, os);
                        IOUtils.closeQuietly(inputStream);
                        IOUtils.closeQuietly(os);
                        IOUtils.closeQuietly(zf);
                    }
                };
            }
        } catch (IOException e) {
            log.error(e.getLocalizedMessage());
            e.printStackTrace();
        }
        if (stream == null) {
            return Response.serverError().build();
        }

        return Response.ok(stream).build();

    }

    @Path("/researchobjects/{id}/bag")
    @Produces(MediaType.APPLICATION_OCTET_STREAM)
    @GET
    public Response getBag(@PathParam(value = "id") String id) {

        String path = getDataPathTo(id);
        String bagNameRoot = getBagNameRoot(id);

        File result = new File(path, bagNameRoot + ".zip");
        try {
            final InputStream inputStream = FileUtils.openInputStream(result);

            StreamingOutput stream = new StreamingOutput() {
                public void write(OutputStream os) throws IOException, WebApplicationException {
                    IOUtils.copy(inputStream, os);
                    IOUtils.closeQuietly(inputStream);
                }
            };

            return Response.ok(stream).build();
        } catch (IOException e) {
            e.printStackTrace();
            return Response.serverError().build();
        }
    }

    // Calculate the path to the zip in the file system based in the base path
    // and the 2 level hash subdirectory scheme
    public static String getDataPathTo(String id) {
        String pathString = DigestUtils.sha1Hex(id);
        String path = Repository.getDataPath();
        // Two level hash-based distribution o files
        path = Paths.get(path, pathString.substring(0, 2), pathString.substring(2, 4)).toString();
        log.debug("Path:" + path);
        return path;
    }

    // Calculate the bagName by replacing non-chars with _ (e.g. the ,:/ chars
    // in our normal tag ids)
    public static String getBagNameRoot(String id) {
        return BagGenerator.getValidName(id);
    }

    // Get the description file or trigger its generation
    private File getDescFile(String id) throws ZipException, IOException {
        String path = getDataPathTo(id);
        String bagNameRoot = getBagNameRoot(id);
        File descFile = new File(path, bagNameRoot + ".desc.json");
        if (!descFile.exists()) {
            final InputStream roInputStream = new FileInputStream(getOREMapFile(id));
            File indexFile = new File(path, bagNameRoot + ".index.json");
            generateIndex(roInputStream, descFile, indexFile);
            IOUtils.closeQuietly(roInputStream);
            log.debug("Created desc/index files");
        } else {
            log.trace("Desc and Index exist");
        }
        return descFile;

    }

    // Get the index file or trigger its generation
    private File getIndexFile(String id) throws ZipException, IOException {
        String path = getDataPathTo(id);
        String bagNameRoot = getBagNameRoot(id);
        File indexFile = new File(path, bagNameRoot + ".index.json");
        if (!indexFile.exists()) {
            final InputStream roInputStream = new FileInputStream(getOREMapFile(id));
            File descFile = new File(path, bagNameRoot + ".desc.json");
            generateIndex(roInputStream, descFile, indexFile);
            IOUtils.closeQuietly(roInputStream);
        } else {
            log.trace("Desc and Index exist");
        }
        return indexFile;

    }

    private JsonNode getAggregation(String id, File indexFile, CountingInputStream cis, boolean withChildren,
            Long oreFileSize) throws JsonParseException, JsonMappingException, IOException {
        log.debug("Getting Aggregation");

        long curPos = 0;

        // Always need to generate these
        ArrayList<String> entries = new ArrayList<String>();
        ArrayList<Long> offsets = new ArrayList<Long>();

        FileInputStream fis = new FileInputStream(indexFile);
        JsonFactory f = new MappingJsonFactory();
        JsonParser jp = f.createParser(fis);

        JsonToken current;
        log.debug("Reading Index file");
        current = jp.nextToken(); // Start object

        while ((current = jp.nextToken()) != null) {
            if (current.equals(JsonToken.FIELD_NAME)) {
                String fName = jp.getText();
                current = jp.nextToken(); // Get to start of
                // value
                long offset = jp.getLongValue();
                log.trace("Adding: " + fName + " : " + offset);
                entries.add(fName);
                offsets.add(offset);
            }
        }
        IOUtils.closeQuietly(fis);

        File descFile = getDescFile(id);
        InputStream is = new FileInputStream(descFile);
        ObjectNode resultNode = (ObjectNode) mapper.readTree(is);
        IOUtils.closeQuietly(is);

        log.trace(resultNode.toString());
        if ((resultNode.has("Has Part")) && withChildren) {

            resultNode = getChildren(resultNode, indexFile, cis, oreFileSize, curPos, entries, offsets);
        } else {
            resultNode.remove("aggregates");
        }
        log.debug("Aggregation retrieved");
        return resultNode;
    }

    // Get the first item, before the entries and offsets lists are created
    // (they are used to get children efficiently)
    private JsonNode getItem(String item, File indexFile, CountingInputStream cis, boolean withChildren,
            long oreFileSize) throws JsonParseException, JsonMappingException, IOException {
        return getItem(item, indexFile, cis, withChildren, oreFileSize, 0, null, null);
    }

    // Get an item as a child using the existing (if not null) entries and
    // offset lists
    private JsonNode getItem(String item, File indexFile, CountingInputStream cis, boolean withChildren,
            Long oreFileSize, long curOffset, ArrayList<String> entries, ArrayList<Long> offsets)
            throws JsonParseException, JsonMappingException, IOException {
        log.trace("Getting: " + item + " with starting offset: " + curOffset);

        long curPos = curOffset;

        if ((entries == null) || (offsets == null)) {
            entries = new ArrayList<String>();
            offsets = new ArrayList<Long>();

            FileInputStream fis = new FileInputStream(indexFile);
            JsonFactory f = new MappingJsonFactory();
            JsonParser jp = f.createParser(fis);

            JsonToken current;
            log.trace("Reading Index file");
            current = jp.nextToken(); // Start object

            while ((current = jp.nextToken()) != null) {
                if (current.equals(JsonToken.FIELD_NAME)) {
                    String fName = jp.getText();
                    current = jp.nextToken(); // Get to start of
                    // value
                    long offset = jp.getLongValue();
                    log.trace("Adding: " + fName + " : " + offset);
                    entries.add(fName);
                    offsets.add(offset);
                }
            }
            try {
                fis.close();
            } catch (Exception e) {
                log.debug(e.getMessage());
            }

        }

        byte[] b = null;
        int bytesRead = 0;

        int index = entries.indexOf(item);
        if (index == -1) {
            log.warn(item + " not in index");
        }
        // getSizeEstimateFor(index)
        int estSize;
        if (index < offsets.size() - 1) {
            estSize = (int) (offsets.get(index + 1) - offsets.get(index));
        } else {
            estSize = (int) (oreFileSize - offsets.get(index));
        }
        curPos += skipTo(cis, curPos, offsets.get(index));
        log.trace("Current Pos updated to : " + curPos);
        b = new byte[estSize];
        bytesRead = cis.read(b);
        log.trace("Read " + bytesRead + " bytes");
        if (bytesRead == estSize) {
            log.trace("Read: " + new String(b));
            InputStream is = new ByteArrayInputStream(b);
            // mapper seems to be OK ignoring a last char such as a comma after
            // the object/tree
            ObjectNode resultNode = (ObjectNode) mapper.readTree(is);
            try {
                is.close();
            } catch (Exception e) {
                log.debug(e.getMessage());
            }

            curPos += bytesRead;
            log.trace("curPos: " + curPos + " : count: " + cis.getByteCount());

            log.trace(resultNode.toString());
            if ((resultNode.has("Has Part")) && withChildren) {
                resultNode = getChildren(resultNode, indexFile, cis, oreFileSize, curPos, entries, offsets);
            } else {
                resultNode.remove("aggregates");
            }
            /*
             * if (args[2] != null) { long offset2 = Long.parseLong(args[2]);
             * sbc.position(offset2); b.clear(); sbc.read(b);
             * 
             * InputStream is2 = new ByteArrayInputStream(b.array());
             * 
             * JsonNode node2 = mapper.readTree(is2);
             * System.out.println(node2.toString()); is2.close(); }
             */
            return resultNode;
        } else {
            return null;
        }

    }

    // Get all direct child nodes
    private ObjectNode getChildren(ObjectNode resultNode, File indexFile, CountingInputStream cis, Long oreFileSize,
            long curPos, ArrayList<String> entries, ArrayList<Long> offsets)
            throws JsonParseException, JsonMappingException, IOException {

        ArrayList<String> childIds = new ArrayList<String>();
        JsonNode children = resultNode.get("Has Part");
        if (children.isArray()) {
            for (JsonNode child : children) {
                childIds.add(child.textValue());
            }
        } else {
            System.out.println("Has Part not an array");
            childIds.add(children.textValue());
        }
        ArrayNode aggregates = mapper.createArrayNode();
        for (String name : childIds) {
            aggregates.add(getItem(name, indexFile, cis, false, oreFileSize, curPos, entries, offsets));
            curPos = cis.getByteCount();
            log.trace("curPos updated to " + curPos + " after reading: " + name);

        }
        log.trace("Child Ids: " + childIds.toString());
        resultNode.set("aggregates", aggregates);
        return resultNode;

    }

    // Skip forward as needed through the oremap to find the next child
    // FixMe - it is not required that AgggegatedResources in the oremap are in
    // the same relative order as they are listed in the dcterms:hasPart
    // list. If backwards skips are seen, we need to order the children
    // according to their relative offsets before attempting to retrieve them.
    private static long skipTo(CountingInputStream cis, long curPos, Long long1) throws IOException {
        log.trace("Skipping to : " + long1.longValue());
        long offset = long1.longValue() - curPos;
        if (offset < 0) {
            log.error("Backwards jump attempted");
            throw new IOException("Backward Skip: failed");
        }
        log.trace("At: " + curPos + " going forward by " + offset);
        long curskip = 0;
        while (curskip < offset) {
            long inc = cis.skip(offset - curskip);
            if (inc == -1) {
                log.error("End of Stream");
                throw new IOException("End of Stream");
            }
            curskip += inc;
        }
        return offset;
    }

    // Create the index file by parsing the oremap
    protected static void generateIndex(InputStream ro, File descFile, File indexFile)
            throws JsonParseException, IOException {

        log.debug("Generating desc and index files");
        JsonFactory f = new MappingJsonFactory(); // reading
        JsonParser jp = f.createParser(ro);

        JsonGenerator generator = new JsonFactory().createGenerator(descFile, JsonEncoding.UTF8);

        JsonToken current;

        current = jp.nextToken();

        report(jp, current);
        while ((current = jp.nextToken()) != null) {
            if (current.equals(JsonToken.FIELD_NAME)) {
                String fName = jp.getText();
                if (fName.equals("describes")) {
                    log.trace("describes");
                    while (((current = jp.nextToken()) != null)) {
                        if (jp.isExpectedStartObjectToken()) {
                            generator.setCodec(new ObjectMapper());
                            generator.useDefaultPrettyPrinter();

                            generator.writeStartObject();

                            while (((current = jp.nextToken()) != JsonToken.END_OBJECT)) {
                                if (current != JsonToken.FIELD_NAME) {
                                    log.warn("Unexpected Token!");
                                    report(jp, current);

                                } else {
                                    report(jp, current);
                                    String name = jp.getText();
                                    current = jp.nextToken(); // Get to start of
                                    // value
                                    if (!name.equals("aggregates")) {
                                        log.trace("Writing: " + name);
                                        generator.writeFieldName(name);
                                        generator.writeTree(jp.readValueAsTree());
                                    } else {
                                        report(jp, current);
                                        log.trace("Skipping?");
                                        if (current.isStructStart()) {
                                            indexChildren(indexFile, jp);
                                            // jp.skipChildren();
                                        } else {
                                            log.warn("Was Not Struct start!");
                                        }
                                        log.trace("Hit aggregates");

                                    }
                                }
                            }

                            generator.writeEndObject();

                            generator.close();
                        }
                    }
                }
            }
        }
    }

    private static void indexChildren(File index, JsonParser jp) throws IOException {

        JsonGenerator generator = new JsonFactory().createGenerator(index, JsonEncoding.UTF8);
        generator.useDefaultPrettyPrinter();

        generator.writeStartObject();

        JsonToken cur = jp.nextToken();
        while (cur.equals(JsonToken.START_OBJECT)) {
            long start = jp.getTokenLocation().getByteOffset();
            int depth = 1;
            while (depth > 0) {
                cur = jp.nextToken();
                if (cur.equals(JsonToken.START_OBJECT)) {
                    depth++;
                } else if (cur.equals(JsonToken.END_OBJECT)) {
                    depth--;
                } else if (cur.equals(JsonToken.FIELD_NAME) && depth == 1) {
                    if (jp.getText().equals("@id")) {
                        cur = jp.nextToken();

                        String vName = jp.getText();
                        generator.writeNumberField(vName, start);
                    } else {
                        report(jp, cur);
                    }
                }
            }
            cur = jp.nextToken();
        }
        generator.writeEndObject();
        generator.close();

    }

    // debug output useful in testing parsing
    private static void report(JsonParser jp, JsonToken token) {
        boolean struct = token.isStructStart() || token.isStructEnd();
        try {
            String tag = struct ? token.asString() : jp.getText();
            log.trace("Tag: " + tag);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        long currentOffset = jp.getCurrentLocation().getByteOffset();
        long tokenOffset = jp.getTokenLocation().getByteOffset();
        log.trace("Cur: " + currentOffset + " tok: " + tokenOffset);
    }

}