io.fluo.mapreduce.FluoInputFormat.java Source code

Java tutorial

Introduction

Here is the source code for io.fluo.mapreduce.FluoInputFormat.java

Source

/*
 * Copyright 2014 Fluo authors (see AUTHORS)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.fluo.mapreduce;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;

import io.fluo.api.config.FluoConfiguration;
import io.fluo.api.config.ScannerConfiguration;
import io.fluo.api.data.Bytes;
import io.fluo.api.data.Span;
import io.fluo.api.iterator.ColumnIterator;
import io.fluo.api.iterator.RowIterator;
import io.fluo.core.impl.Environment;
import io.fluo.core.impl.TransactionImpl;
import io.fluo.core.util.SpanUtil;
import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
import org.apache.accumulo.core.client.mapreduce.RangeInputSplit;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.commons.configuration.ConfigurationConverter;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

/**
 * This input format reads a consistent snapshot from a Fluo table.
 */
public class FluoInputFormat extends InputFormat<Bytes, ColumnIterator> {

    private static String TIMESTAMP_CONF_KEY = FluoInputFormat.class.getName() + ".timestamp";
    private static String PROPS_CONF_KEY = FluoInputFormat.class.getName() + ".props";
    private static String FAMS_CONF_KEY = FluoInputFormat.class.getName() + ".families";

    @Override
    public RecordReader<Bytes, ColumnIterator> createRecordReader(InputSplit split, TaskAttemptContext context)
            throws IOException, InterruptedException {

        return new RecordReader<Bytes, ColumnIterator>() {

            private Entry<Bytes, ColumnIterator> entry;
            private RowIterator rowIter;
            private Environment env = null;
            private TransactionImpl ti = null;

            @Override
            public void close() throws IOException {
                if (env != null) {
                    env.close();
                }
                if (ti != null) {
                    ti.close();
                }
            }

            @Override
            public Bytes getCurrentKey() throws IOException, InterruptedException {
                return entry.getKey();
            }

            @Override
            public ColumnIterator getCurrentValue() throws IOException, InterruptedException {
                return entry.getValue();
            }

            @Override
            public float getProgress() throws IOException, InterruptedException {
                // TODO Auto-generated method stub
                return 0;
            }

            @Override
            public void initialize(InputSplit split, TaskAttemptContext context)
                    throws IOException, InterruptedException {
                try {
                    // TODO this uses non public Accumulo API!
                    RangeInputSplit ris = (RangeInputSplit) split;

                    Span span = SpanUtil.toSpan(ris.getRange());

                    ByteArrayInputStream bais = new ByteArrayInputStream(
                            context.getConfiguration().get(PROPS_CONF_KEY).getBytes("UTF-8"));
                    PropertiesConfiguration props = new PropertiesConfiguration();
                    props.load(bais);

                    env = new Environment(new FluoConfiguration(props));

                    ti = new TransactionImpl(env, context.getConfiguration().getLong(TIMESTAMP_CONF_KEY, -1));
                    ScannerConfiguration sc = new ScannerConfiguration().setSpan(span);

                    for (String fam : context.getConfiguration().getStrings(FAMS_CONF_KEY, new String[0]))
                        sc.fetchColumnFamily(Bytes.wrap(fam));

                    rowIter = ti.get(sc);
                } catch (Exception e) {
                    throw new IOException(e);
                }
            }

            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                if (rowIter.hasNext()) {
                    entry = rowIter.next();
                    return true;
                }
                return false;
            }
        };

    }

    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        return new AccumuloInputFormat().getSplits(context);
    }

    /**
     * Configure properties needed to connect to a Fluo instance
     * 
     * @param conf
     * @param props
     *          use {@link io.fluo.api.config.FluoConfiguration} to configure programmatically
     */
    @SuppressWarnings("deprecation")
    public static void configure(Job conf, Properties props) {
        try {
            FluoConfiguration config = new FluoConfiguration(ConfigurationConverter.getConfiguration(props));
            try (Environment env = new Environment(config)) {
                long ts = env.getSharedResources().getTimestampTracker().allocateTimestamp();
                conf.getConfiguration().setLong(TIMESTAMP_CONF_KEY, ts);

                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                props.store(baos, "");
                conf.getConfiguration().set(PROPS_CONF_KEY, new String(baos.toByteArray(), "UTF8"));

                AccumuloInputFormat.setZooKeeperInstance(conf, config.getAccumuloInstance(),
                        config.getZookeepers());
                AccumuloInputFormat.setConnectorInfo(conf, config.getAccumuloUser(),
                        new PasswordToken(config.getAccumuloPassword()));
                AccumuloInputFormat.setInputTableName(conf, env.getTable());
                AccumuloInputFormat.setScanAuthorizations(conf, env.getAuthorizations());
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    // TODO support text
    public static void fetchFamilies(Job job, String... fams) {
        job.getConfiguration().setStrings(FAMS_CONF_KEY, fams);
    }

    public static void fetchFamilies(Job job, Bytes... fams) {
        // TODO support binary data
        String sfams[] = new String[fams.length];
        for (int i = 0; i < sfams.length; i++) {
            sfams[i] = fams[i].toString();
        }
        fetchFamilies(job, sfams);
    }

    // TODO let user set auths
    // TODO let user set ranges
}