org.mrgeo.data.accumulo.output.image.AccumuloMrsImagePyramidOutputFormatProvider.java Source code

Java tutorial

Introduction

Here is the source code for org.mrgeo.data.accumulo.output.image.AccumuloMrsImagePyramidOutputFormatProvider.java

Source

/*
 * Copyright 2009-2015 DigitalGlobe, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */

package org.mrgeo.data.accumulo.output.image;

import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.security.ColumnVisibility;
import org.apache.accumulo.core.util.Pair;
import org.apache.accumulo.core.util.TextUtil;
import org.apache.commons.codec.binary.Base64;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.mrgeo.core.MrGeoConstants;
import org.mrgeo.data.DataProviderException;
import org.mrgeo.data.accumulo.image.AccumuloMrsImageDataProvider;
import org.mrgeo.data.accumulo.partitioners.AccumuloMrGeoRangePartitioner;
import org.mrgeo.data.accumulo.utils.AccumuloConnector;
import org.mrgeo.data.accumulo.utils.AccumuloUtils;
import org.mrgeo.data.accumulo.utils.MrGeoAccumuloConstants;
import org.mrgeo.data.image.MrsImageDataProvider;
import org.mrgeo.data.image.MrsImageOutputFormatProvider;
import org.mrgeo.data.image.MrsImagePyramidMetadataWriter;
import org.mrgeo.data.raster.RasterWritable;
import org.mrgeo.data.tile.TileIdWritable;
import org.mrgeo.data.tile.TiledOutputFormatContext;
import org.mrgeo.utils.Bounds;
import org.mrgeo.utils.LongRectangle;
import org.mrgeo.utils.TMSUtils;
import org.mrgeo.utils.TMSUtils.TileBounds;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.ByteBuffer;
import java.util.*;

public class AccumuloMrsImagePyramidOutputFormatProvider extends MrsImageOutputFormatProvider {
    final MrsImageDataProvider provider;
    final static Logger log = LoggerFactory.getLogger(AccumuloMrsImagePyramidOutputFormatProvider.class);

    private int zoomLevel = -1;
    private int tileSize = -1;
    private Bounds bounds = null;

    private TileBounds tileBounds = null;
    private String table = null;

    private long bulkThreshold = Long.MAX_VALUE;
    private long tileCount = -1;

    private boolean doBulk = false;
    private boolean forceBulk = true;

    private Properties props;

    private ColumnVisibility cv = null;
    //private TiledOutputFormatContext context;

    /*
     *  it is assumed that output for bulk ingest will be of the form
     *  tld_workdir/classname/outputtable/timestamp/
     */
    private String workDir = null;

    public AccumuloMrsImagePyramidOutputFormatProvider(final AccumuloMrsImageDataProvider provider,
            final TiledOutputFormatContext context, final ColumnVisibility cv) {

        super(context);

        this.provider = provider;
        this.cv = cv; //provider.getColumnVisibility();

        //TODO - program things to get rid of this
        if (this.cv == null) {
            this.cv = new ColumnVisibility();
        }
        log.info("column visibility of: " + this.cv.toString());

        this.zoomLevel = context.getZoomlevel();
        this.tileSize = context.getTilesize();

        // get the tile bounds
        this.bounds = context.getBounds();
        this.tileBounds = TMSUtils.boundsToTile(TMSUtils.Bounds.asTMSBounds(this.bounds), this.zoomLevel,
                this.tileSize);

        this.table = context.getOutput();
        if (table.startsWith(MrGeoAccumuloConstants.MRGEO_ACC_PREFIX)) {
            table = table.replace(MrGeoAccumuloConstants.MRGEO_ACC_PREFIX, "");
        }
        log.info("Accumulo working with output table of: " + this.table);

        // figure out the size of output
        tileCount = (this.tileBounds.e - this.tileBounds.w + 1) * (this.tileBounds.n - this.tileBounds.s + 1);

        props = AccumuloConnector.getAccumuloProperties();

        if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_BULK_THRESHOLD)) {
            bulkThreshold = Long.parseLong(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_BULK_THRESHOLD));
        }
        if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_FORCE_BULK)) {
            if (Boolean.parseBoolean(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_FORCE_BULK))) {
                log.info("Forcing bulk ingest!");
                forceBulk = true;
                doBulk = true;
            }
        }

        log.info("working with output tile count of " + tileCount + " ("
                + (this.tileBounds.e - this.tileBounds.w + 1) + "x" + (this.tileBounds.n - this.tileBounds.s + 1)
                + ") threshold at " + bulkThreshold);

        if (tileCount > bulkThreshold) {
            // doing bulk ingest
            log.info("Doing Bulk ingest");
            doBulk = true;
        }

    } // end constructor

    @Override
    public OutputFormat getOutputFormat() {
        // TODO Auto-generated method stub

        if (doBulk || forceBulk) {
            log.info("file output format being used at zoom level = " + zoomLevel);
            return new AccumuloMrsImagePyramidFileOutputFormat(zoomLevel, cv);
            //return new AccumuloMrsImagePyramidFileOutputFormat();
        } else {
            log.info("accumulo going direct for output at zoom level = " + zoomLevel);

            return new AccumuloMrsImagePyramidOutputFormat(zoomLevel, cv);
        }
    } // end getOutputFormat

    public boolean bulkJob() {
        //return false;
        return doBulk;
    }

    //  public String getWorkDir(){
    //    return workDir + "files" + File.separator;    
    //    
    //  } // end getWorkDir

    @Override
    public MrsImagePyramidMetadataWriter getMetadataWriter() {
        return provider.getMetadataWriter();
    }

    @Override
    public MrsImageDataProvider getImageProvider() {
        return provider;
    }

    @Override
    public void setupJob(final Job job) throws DataProviderException {
        try {
            //TODO: there is an assumption here that the output is going to accumulo directly - not bulk
            super.setupJob(job);

            job.getConfiguration().addResource(AccumuloConnector.getAccumuloPropertiesLocation());

            // zoom level - output zoom level
            zoomLevel = context.getZoomlevel();
            //      zoomLevel = job.getConfiguration().getInt("zoomlevel", 0);
            if (zoomLevel != 0) {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL,
                        Integer.toString(zoomLevel));
            }

            //job.getConfiguration().set("zoomLevel", Integer.toString(zoomLevel));
            if (doBulk) {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE,
                        MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK);
                job.getConfiguration().set(
                        MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel),
                        MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK);
            } else {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE,
                        MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT);
                job.getConfiguration().set(
                        MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel),
                        MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT);

            }
            Properties props = AccumuloConnector.getAccumuloProperties();
            if (props != null) {

                // this used to be the variable "name" in TiledOutputFormatContext, but was always "".
                String enc = AccumuloConnector.encodeAccumuloProperties("");
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_RESOURCE, enc);

                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE,
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE));
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS,
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS));

                if (props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE) == null) {
                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, this.table);
                } else {
                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE,
                            props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE));
                }

                // username and password
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER,
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER));

                // make sure the password is set with Base64Encoding
                String pw = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD);
                String isEnc = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, "false");

                if (isEnc.equalsIgnoreCase("true")) {
                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD,
                            props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD));
                } else {
                    byte[] p = Base64.encodeBase64(
                            props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD).getBytes());

                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD, new String(p));
                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64,
                            new String("true"));
                }

                if (job.getConfiguration().get(MrGeoConstants.MRGEO_PROTECTION_LEVEL) != null) {
                    cv = new ColumnVisibility(job.getConfiguration().get(MrGeoConstants.MRGEO_PROTECTION_LEVEL));
                }
                if (cv == null) {

                    if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)) {

                        job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ,
                                props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));

                        cv = new ColumnVisibility(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));

                    }

                } else {
                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ,
                            new String(cv.getExpression()));
                }

            }

            if (doBulk) {

                LongRectangle outTileBounds = tileBounds.toLongRectangle();

                // setup the output for the job
                if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR)) {
                    workDir = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR);
                    if (workDir != null) {
                        workDir += File.separator;
                    }
                } else {
                    workDir = "";
                }
                workDir += AccumuloMrsImagePyramidFileOutputFormat.class.getSimpleName() + File.separator
                        + this.table + File.separator;// +
                //            System.currentTimeMillis() +
                //            File.separator;

                // delete the work dir if possible
                //        Path wd = new Path(workDir);
                //        FileSystem fs = HadoopFileUtils.getFileSystem(wd);        
                //        if (fs.exists(wd))
                //        {
                //          fs.delete(wd, false);
                //        }

                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR, workDir);

                // determine the starting points for the splits
                ArrayList<Pair<Long, Long>> splitPoints = new ArrayList<Pair<Long, Long>>();

                // think about the multiple levels and creating other splits!!!

                long step = bulkThreshold / outTileBounds.getWidth();
                long rem = bulkThreshold % outTileBounds.getWidth();
                if (rem > 0) {
                    step++;
                }
                for (long y = outTileBounds.getMinY(); y <= outTileBounds.getMaxY(); y += step) {
                    Pair<Long, Long> cur = new Pair<Long, Long>(outTileBounds.getMinX(), y);
                    splitPoints.add(cur);
                }

                // we now have our list of split points
                // now build the splits file!!!
                FileSystem fs = null;
                //FileSystem.get(job.getConfiguration());
                PrintStream out = null;

                try {
                    Path wd = new Path(workDir);
                    fs = FileSystem.get(job.getConfiguration());
                    if (fs.exists(wd)) {
                        fs.delete(wd, true);
                    }

                    out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "splits.txt"))));

                    for (Pair<Long, Long> p : splitPoints) {
                        long split = TMSUtils.tileid(p.getFirst(), p.getSecond(), zoomLevel);
                        //TileIdWritable t = new TileIdWritable(split);
                        Text t = new Text(longToBytes(split));
                        out.println(new String(Base64.encodeBase64(TextUtil.getBytes(t))));
                        log.debug("Point: " + p.getFirst() + "\t" + p.getSecond() + "\t" + split + "\t"
                                + t.getLength());
                    }

                    job.setNumReduceTasks(splitPoints.size() + 1);
                    out.close();

                    job.setPartitionerClass(AccumuloMrGeoRangePartitioner.class);
                    AccumuloMrGeoRangePartitioner.setSplitFile(job, workDir + "splits.txt");

                } catch (IOException ioe) {
                    ioe.printStackTrace();
                    throw new DataProviderException(
                            "Problem creating output splits.txt for bulk ingest directory.");
                }

                job.setOutputFormatClass(AccumuloMrsImagePyramidFileOutputFormat.class);

                AccumuloMrsImagePyramidFileOutputFormat.setOutputPath(job, new Path(workDir + "files"));
                //AccumuloMrsImagePyramidFileOutputFormat.setZoomLevel(zoomLevel);

            } else {

                log.info("Setting the output format of: "
                        + AccumuloMrsImagePyramidOutputFormat.class.getCanonicalName());

                job.setOutputFormatClass(AccumuloMrsImagePyramidOutputFormat.class);
                AccumuloMrsImagePyramidOutputFormat.setJob(job);

                log.info("Setting zoom level to " + zoomLevel);
                log.info("Visibility is " + cv.toString());
                log.info("Setting the number of reducers to " + MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS);
                job.setNumReduceTasks(MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS);
            }

            job.setOutputKeyClass(TileIdWritable.class);
            job.setOutputValueClass(RasterWritable.class);

        } catch (IOException ioe) {
            throw new DataProviderException("Error running job setup", ioe);
        }

    } // end setupJob

    @Override
    public void teardown(Job job) throws DataProviderException {
        String myJobType = job.getConfiguration()
                .get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel));

        // TODO Auto-generated method stub
        if (myJobType.equals(MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK)) {
            // do bulk ingest now
            Connector conn = AccumuloConnector.getConnector();
            FileSystem fs = null;
            PrintStream out = null;

            if (workDir == null) {
                workDir = job.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR);
            }

            try {
                log.info("Bulk ingest starting from working directory of " + workDir);
                fs = FileSystem.get(job.getConfiguration());

                Path working = new Path(workDir + File.separator,
                        MrGeoAccumuloConstants.MRGEO_ACC_FILE_NAME_BULK_WORKING);
                Path completed = new Path(workDir + File.separator,
                        MrGeoAccumuloConstants.MRGEO_ACC_FILE_NAME_BULK_DONE);
                if (fs.exists(working) || fs.exists(completed)) {
                    log.info("Bulk ingest completed already.");
                    return;
                } else {
                    FSDataOutputStream fout = fs.create(working);
                    fout.write(("zoom level = " + Integer.toString(zoomLevel) + "\n").getBytes());
                    fout.close();
                }

                // at this point - there should be something to bulk ingest

                // find the _SUCCESS file
                //        Path success = new Path(workDir + "files" + File.separator, "_SUCCESS");
                //        if(! fs.exists(success)){
                //          // failure in the job
                //          throw new DataProviderException("Hadoop job did not finish correctly.");
                //        } else {
                //          fs.delete(success, true);
                //        }
                //        
                //        Path logs = new Path(workDir + "files" + File.separator, "_logs");
                //        if(! fs.exists(logs)){
                //          // failure in the job
                //          throw new DataProviderException("Hadoop job did not finish correctly.");
                //        } else {
                //          fs.delete(logs, true);
                //        }
                log.info("Setting work indication file.");

                // make sure there is a failures directory
                Path failures = new Path(workDir, "failures");
                fs.delete(failures, true);
                fs.mkdirs(new Path(workDir, "failures"));

                if (!conn.tableOperations().exists(table)) {
                    conn.tableOperations().create(table, true);
                    HashMap<String, Set<Text>> groups = new HashMap<String, Set<Text>>();
                    for (int i = 1; i <= 18; i++) {
                        String k = Integer.toString(i);
                        HashSet<Text> hs = new HashSet<Text>();

                        hs.add(new Text(Integer.toString(i)));
                    }
                    conn.tableOperations().setLocalityGroups(table, groups);
                }

                conn.tableOperations().importDirectory(table, workDir + "files", workDir + "failures", true);
                conn.tableOperations().compact(table, new Text("" + 0x00), new Text("" + 0xFF), true, false);

                FSDataOutputStream fout = fs.create(completed);
                fout.write(("zoom level = " + Integer.toString(zoomLevel) + "\n").getBytes());
                fout.close();
                fs.delete(working, true);

            } catch (Exception e) {
                e.printStackTrace();
                throw new DataProviderException("Problem doing bulk ingest.");
            }

        }

    } // end teardown

    public byte[] longToBytes(long x) {
        ByteBuffer buffer = ByteBuffer.allocate(8);
        buffer.putLong(x);
        return buffer.array();
    }

    @Override
    public boolean validateProtectionLevel(String protectionLevel) {
        return AccumuloUtils.validateProtectionLevel(protectionLevel);
    }

} // end AccumuloMrsImagePyramidOutputFormatProvider