org.vpac.ndg.query.Query.java Source code

Introduction

Here is the source code for org.vpac.ndg.query.Query.java
Source

/*
 * This file is part of the Raster Storage Archive (RSA).
 *
 * The RSA is free software: you can redistribute it and/or modify it under the
 * terms of the GNU General Public License as published by the Free Software
 * Foundation, either version 3 of the License, or (at your option) any later
 * version.
 *
 * The RSA is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * the RSA.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Copyright 2013 CRCSI - Cooperative Research Centre for Spatial Information
 * http://www.crcsi.com.au/
 */

package org.vpac.ndg.query;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.vpac.ndg.query.DatasetOutput.VariableBindingDefinition;
import org.vpac.ndg.query.QueryDefinition.DatasetInputDefinition;
import org.vpac.ndg.query.QueryDefinition.GridDefinition;
import org.vpac.ndg.query.coordinates.CoordinateUtils;
import org.vpac.ndg.query.coordinates.GridProjected;
import org.vpac.ndg.query.coordinates.QueryCoordinateSystem;
import org.vpac.ndg.query.coordinates.TimeAxis;
import org.vpac.ndg.query.coordinates.Warp;
import org.vpac.ndg.query.coordinates.WarpFactory;
import org.vpac.ndg.query.filter.Accumulator;
import org.vpac.ndg.query.filter.Foldable;
import org.vpac.ndg.query.iteration.Flatten;
import org.vpac.ndg.query.iteration.ZipN;
import org.vpac.ndg.query.math.BoxInt;
import org.vpac.ndg.query.math.BoxReal;
import org.vpac.ndg.query.math.VectorInt;
import org.vpac.ndg.query.sampling.Binding;
import org.vpac.ndg.query.sampling.PageCache;
import org.vpac.ndg.query.sampling.TileGenerator;
import org.vpac.ndg.query.sampling.TilingStrategy;
import org.vpac.ndg.query.sampling.TilingStrategyStride;

import ucar.nc2.NetcdfFileWriter;

/**
 * Generates new datasets by passing existing data through a filter. In
 * programming terms, a {@link FilterAdapter} is the operation and the Query provides
 * the parameters.
 * 
 * @author Alex Fraser
 * @see FilterAdapter
 * @see QueryDefinition
 */
public class Query implements Closeable {

    final Logger log = LoggerFactory.getLogger(Query.class);

    protected String referential;
    protected DatasetStore datasetStore;
    protected CoordinateUtils coordinateUtils;
    protected BindingStore bindings;
    protected DatasetOutput outputDs;
    protected NetcdfFileWriter output;
    protected List<List<FilterAdapter>> filters;

    /**
     * The arrangement of output pages. For input pages, see {@link PageCache}.
     */
    protected TilingStrategy tilingStrategy;
    protected TileProcessor tileProcessor;
    protected Progress progress;
    protected int numThreads = 2;

    static final int TILE_SIZE = 256;
    static final int DEFAULT_WORKER_THREADS = 1;

    /**
     * Creates a null query. Use {@link #setMemento(QueryDefinition, String)} to
     * initialise.
     */
    public Query(NetcdfFileWriter output) {
        referential = null;
        bindings = new BindingStore();
        this.output = output;
        filters = new ArrayList<List<FilterAdapter>>();
        numThreads = DEFAULT_WORKER_THREADS;
        progress = new ProgressNull();

        //tilingStrategy = new TilingStrategyCube(TILE_SIZE);
        tilingStrategy = new TilingStrategyStride(TILE_SIZE * TILE_SIZE * 3);
    }

    public void setNumThreads(int numThreads) throws QueryConfigurationException {
        if (numThreads < 1) {
            throw new QueryConfigurationException("Number of threads must be one or more.");
        }
        this.numThreads = numThreads;
    }

    /**
     * Constructs this query from a {@link QueryDefinition}.
     * @param qd The query, in serialised form.
     * @param uri The specified uri.
     */
    public void setMemento(QueryDefinition qdOrig, String uri)
            throws IOException, SecurityException, QueryConfigurationException {

        log.info("Configuring query based on memento");

        referential = uri;

        datasetStore = new DatasetStore();
        coordinateUtils = new CoordinateUtils(datasetStore);

        // Pre-process query: Sort filters into creation order, expand
        // references to sockets, etc.
        QueryDefinitionProprocessor qdp = new QueryDefinitionProprocessor();
        qdp.setQueryDefinition(qdOrig);
        qdp.gatherFilterInfo();
        // Sort filters first - avoids sorting based on more numerous expanded
        // references.
        qdp.sortFilters();

        // Before socket refs can be expanded, the inputs need to be queried.
        log.info("Initialising inputs");
        initialiseInputs(qdp.getQueryDefinition());
        qdp.guessGrid(datasetStore.inputDatasets);
        qdp.expandReferences(datasetStore);

        // Now that references have been expanded, input bands can be marked as
        // required. These bands will be opened later; others will be ignored
        // if the DatasetProvider supports selective opening.
        log.info("Requesting bands {}", qdp.getInputVariableReferences());
        for (NodeReference nr : qdp.getInputVariableReferences())
            datasetStore.requestInputBand(nr);

        // Determine bounds now, before inputs have been fully determined.
        // This is an optimisation for opening a small section of a very large
        // tiled dataset.
        log.info("Initialising grid");
        GridProjected outputGrid = coordinateUtils.initialiseGrid(qdp.getQueryDefinition().output.grid);
        DateTime temporalBounds[] = determineTemporalBounds(qdp.getQueryDefinition());

        log.info("Opening inputs");
        openInputs(qdp.getQueryDefinition(), outputGrid, temporalBounds);

        // Now that the datasets are open, finish constructing the coordinate
        // system.
        QueryCoordinateSystem csys = constructCoordinateSystem(outputGrid);

        // Create one factory per thread. This ensures each filter is only
        // connected to others that were created for the same thread context.
        List<FilterFactory> factories = new ArrayList<FilterFactory>(numThreads);
        if (numThreads < 1) {
            throw new QueryConfigurationException("Invalid number of threads: must be greater than zero.");
        }
        for (int i = 0; i < numThreads; i++) {
            factories.add(new FilterFactory(datasetStore, bindings, csys));
        }

        // Contstruct filters. Doing this now allows the output dataset to
        // inherit metadata.
        log.info("Constructing filters");
        for (FilterFactory factory : factories) {
            filters.add(factory.createFilters(qdp.getQueryDefinition().filters));
        }

        // Construct the output, so the filters can be bound to it. At this
        // point a single filter store is passed in; when binding, all stores
        // are used (below).
        log.info("Creating output file");
        outputDs = new DatasetOutput(qdp.getQueryDefinition().output, output, datasetStore);
        List<VariableBindingDefinition> variableBindingDefs = outputDs.configure(csys, qdp.getQueryDefinition(),
                factories.get(0).getFilterStore());
        datasetStore.addDataset(outputDs);

        log.info("Binding filters to output");
        for (FilterFactory factory : factories) {
            factory.bindFilters(variableBindingDefs);
        }
        for (FilterAdapter f : new Flatten<FilterAdapter>(filters)) {
            f.verifyConfiguration();
        }
    }

    /**
     * Start creating input datasets. They will be queried for their ideal
     * bounds, and placed in the dataset store.
     */
    private void initialiseInputs(QueryDefinition qd) throws IOException, QueryConfigurationException {
        if (qd.inputs == null || qd.inputs.size() == 0) {
            throw new QueryConfigurationException("No inputs specified.");
        }
        for (DatasetInputDefinition did : qd.inputs) {
            DatasetInput di = new DatasetInput(did, referential, qd.cache);
            di.peekGrid();
            datasetStore.addDataset(di);
        }
    }

    private DateTime[] determineTemporalBounds(QueryDefinition qd) {
        DateTime temporalBounds[] = new DateTime[2];
        DateTimeFormatter fmt = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC();
        GridDefinition gd = qd.output.grid;
        if (gd.timeMin != null && !gd.timeMin.isEmpty())
            temporalBounds[0] = fmt.parseDateTime(qd.output.grid.timeMin);
        if (gd.timeMax != null && !gd.timeMax.isEmpty())
            temporalBounds[1] = fmt.parseDateTime(qd.output.grid.timeMax);
        return temporalBounds;
    }

    private void openInputs(QueryDefinition qd, GridProjected outputGrid, DateTime[] temporalBounds)
            throws IOException, QueryConfigurationException {

        // STEP 2: Open input datasets fully, using output grid as a hint about
        // what to open. Actual bounds are determined here.

        // TODO: It might be possible to do a bit more of the output dataset
        // and filter configuration before this. That could allow for input
        // bounds to be guessed for non-gridded output datasets, e.g. those with
        // only a time dimension.

        WarpFactory warpFactory = new WarpFactory();
        BoxReal outputBounds = outputGrid.getBounds();
        for (DatasetInput di : datasetStore.getInputDatasets()) {
            // Warp the output bounds to the input coordinate space.
            Warp toLocal = warpFactory.createSpatialWarp(outputGrid.getSrs().getProjection(),
                    di.getGrid().getSrs().getProjection());
            BoxReal localInputBounds = outputBounds.copy();
            toLocal.warp(localInputBounds);

            // Shrink the bounding box so it doesn't extend beyond the natural
            // extents of the input data. This has no effect on the output
            // bounds.
            localInputBounds.intersect(di.getGrid().getBounds());

            di.open(localInputBounds, temporalBounds[0], temporalBounds[1]);
        }
    }

    private QueryCoordinateSystem constructCoordinateSystem(GridProjected outputGrid)
            throws QueryConfigurationException {
        TimeAxis timeAxis = coordinateUtils.collateTime(datasetStore.getInputDatasets());
        QueryCoordinateSystem csys = new QueryCoordinateSystem(outputGrid, timeAxis);
        return csys;
    }

    /**
     * Run the configured filters over the input datasets.
     * @throws QueryConfigurationException 
     */
    public void run() throws IOException, QueryConfigurationException, QueryRuntimeException {

        progress.setNsteps(bindings.keys().size());

        log.info("Initialising filters");
        for (FilterAdapter f : new Flatten<FilterAdapter>(filters)) {
            f.initialise();
        }

        // Calculate volume (just for progress information)
        long totalPixels = 0;
        for (VectorInt shape : bindings.keys()) {
            totalPixels += shape.volume() * bindings.get(shape).size();
        }
        progress.setTotalQuanta(totalPixels);
        log.info("Total output volume: {} pixels", totalPixels);

        if (numThreads == 1)
            tileProcessor = new TileProcessorSingle();
        else
            tileProcessor = new TileProcessorMultiple(numThreads);

        log.info("Processing");
        int step = 0;
        try {
            for (VectorInt shape : bindings.keys()) {
                progress.setStep(step + 1, String.format("Processing variables with shape %s", shape));

                process(shape, bindings.get(shape));
                step++;
            }
        } finally {
            tileProcessor.shutDown();
        }

        log.info("Finished");
        progress.setStep(step, "Finished running filters");
        progress.finished();
    }

    protected void process(VectorInt imageShape, Collection<Binding> localBindings)
            throws QueryConfigurationException, IOException {

        // Rasterise output into a set of tiles.
        VectorInt tileShape = tilingStrategy.getTileShape(imageShape);
        VectorInt tileGridShape = tilingStrategy.getGridShape(imageShape);

        log.info("Image shape is {}", imageShape);
        log.debug("Tile grid shape is {}", tileGridShape);

        tileProcessor.setBindings(localBindings);

        // Iterate over the output tiles. Note that the input is tiled too, but
        // that is taken care of in PageCache.
        for (VectorInt tile : new TileGenerator(tileGridShape)) {
            VectorInt offset = tile.mulNew(tileShape);
            VectorInt end = offset.addNew(tileShape).min(imageShape);
            BoxInt bounds = new BoxInt(offset, end);

            log.trace("Processing tile {} with bounds {}", tile, bounds);

            // 1. Create arrays.
            for (Binding b : localBindings)
                b.setBounds(bounds);

            // 2. Run filters.
            tileProcessor.setBounds(bounds);
            tileProcessor.processTile();

            // 3. Write data.
            for (Binding b : localBindings)
                b.commit(output);

            long npixels = bounds.getSize().volume() * localBindings.size();
            progress.addProcessedQuanta(npixels);
        }

        progress.finishedStep();
    }

    /**
     * Collects the output that was accumated while running this query.
     *
     * Most filters are designed to process pixels in an image and transform
     * their values, with the new value being written to an output image. Some
     * filters may also collect aggregate information about the data as it is
     * being processed; e.g. a filter may sum the values of all the pixels it
     * processes. This method gives access to that data.
     *
     * @return The output that has been accumulated while running this query.
     *         This is a map from filter ID to accumulated output.
     */
    @SuppressWarnings({ "rawtypes", "unchecked" })
    public Map<String, Foldable<?>> getAccumulatedOutput() {
        Map<String, Foldable<?>> values = new HashMap<String, Foldable<?>>();

        // In a multithreaded query, accumulated output may be split across
        // several filter instances. So we zip all instances of the same filter
        // together, and then fold (reduce) the output.
        for (Iterable<FilterAdapter> fs : new ZipN<FilterAdapter>(filters)) {
            Foldable value = null;
            String id = null;
            for (FilterAdapter fa : fs) {
                if (!(fa.getInnerFilter() instanceof Accumulator<?>))
                    continue;
                Accumulator ac = (Accumulator) fa.getInnerFilter();

                Foldable currentValue = ac.getAccumulatedOutput();
                if (value == null) {
                    value = currentValue;
                    id = fa.getName();
                } else {
                    value = value.fold(value.getClass().cast(currentValue));
                }
                log.debug("Partial output of '{}' is {}", id, currentValue);
            }
            if (value != null) {
                log.info("Accumulated output of '{}' is {}", id, value);
                values.put(id, value);
            }
        }

        return values;
    }

    /**
     * Releases all resources (except things that the garbage collector handles).
     */
    @Override
    public void close() throws IOException {
        datasetStore.closeAll();
        for (FilterAdapter f : new Flatten<FilterAdapter>(filters))
            f.diagnostics();
    }

    public Progress getProgress() {
        return progress;
    }

    public void setProgress(Progress progress) {
        this.progress = progress;
    }

}