org.calrissian.accumulorecipes.commons.iterators.FirstNEntriesInRowIterator.java Source code

Java tutorial

Introduction

Here is the source code for org.calrissian.accumulorecipes.commons.iterators.FirstNEntriesInRowIterator.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.calrissian.accumulorecipes.commons.iterators;

import static org.calrissian.accumulorecipes.commons.util.RowEncoderUtil.encodeRow;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.PartialKey;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.OptionDescriber;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.hadoop.io.Text;

/**
 * This {@link org.apache.accumulo.core.iterators.SortedKeyValueIterator} will return only the first n
 * keys/values for each row that it iterates through.
 */
public class FirstNEntriesInRowIterator implements OptionDescriber, SortedKeyValueIterator<Key, Value> {

    private SortedKeyValueIterator<Key, Value> source = null;

    // options
    static final String NUM_SCANS_STRING_NAME = "scansBeforeSeek";
    static final String NUM_KEYS_STRING_NAME = "n";

    // iterator predecessor seek options to pass through
    private Range latestRange;
    private Collection<ByteSequence> latestColumnFamilies;
    private boolean latestInclusive;

    // private fields
    private Text lastRowFound;
    private int numscans;

    private Key topKey;
    private Value topValue;

    private Collection<Map.Entry<Key, Value>> keysValues = Lists.newArrayList();

    private boolean hasSeeked = false;

    private int n;

    public static void setNumScansBeforeSeek(IteratorSetting cfg, int num) {
        cfg.addOption(NUM_SCANS_STRING_NAME, Integer.toString(num));
    }

    public static void setNumKeysToReturn(IteratorSetting cfg, int n) {
        cfg.addOption(NUM_KEYS_STRING_NAME, Integer.toString(n));
    }

    // this must be public for OptionsDescriber
    public FirstNEntriesInRowIterator() {
    }

    public FirstNEntriesInRowIterator(FirstNEntriesInRowIterator other, IteratorEnvironment env) {
        setSource(other.getSource().deepCopy(env));
    }

    protected void setSource(SortedKeyValueIterator<Key, Value> source) {
        this.source = source;
    }

    @Override
    public SortedKeyValueIterator<Key, Value> deepCopy(IteratorEnvironment env) {
        return new FirstNEntriesInRowIterator(this, env);
    }

    @Override
    public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options,
            IteratorEnvironment env) throws IOException {
        String o = options.get(NUM_SCANS_STRING_NAME);
        String nStr = options.get(NUM_KEYS_STRING_NAME);
        n = nStr == null ? 25 : Integer.parseInt(nStr);
        numscans = o == null ? 10 : Integer.parseInt(o);
        setSource(source.deepCopy(env));
    }

    private void prepKeys() throws IOException {
        if (lastRowFound == null) {
            topKey = null;
            topValue = null;
            return;
        }

        keysValues.clear();

        while (getSource().hasTop() && keysValues.size() < n
                && getSource().getTopKey().getRow().equals(lastRowFound)) {
            keysValues.add(
                    Maps.immutableEntry(new Key(getSource().getTopKey()), new Value(getSource().getTopValue())));
            getSource().next();
        }

        topKey = new Key(lastRowFound, keysValues.iterator().next().getKey().getColumnFamily());
        topValue = encodeRow(keysValues);
    }

    private boolean finished = true;

    @Override
    public boolean hasTop() {
        return !finished && (topKey != null || getSource().hasTop());
    }

    @Override
    public void next() throws IOException {
        skipRow();
        prepKeys();
    }

    protected SortedKeyValueIterator<Key, Value> getSource() {
        if (source == null)
            throw new IllegalStateException("getting null source");
        return source;
    }

    // this is only ever called immediately after getting "next" entry
    protected void skipRow() throws IOException {
        if (finished == true || lastRowFound == null)
            return;
        int count = 0;
        while (getSource().hasTop() && lastRowFound.equals(getSource().getTopKey().getRow())) {

            // try to efficiently jump to the next matching key
            if (count < numscans) {
                ++count;
                getSource().next(); // scan
            } else {
                // too many scans, just seek
                count = 0;

                // determine where to seek to, but don't go beyond the user-specified range
                Key nextKey = getSource().getTopKey().followingKey(PartialKey.ROW);
                if (!latestRange.afterEndKey(nextKey))
                    getSource().seek(
                            new Range(nextKey, true, latestRange.getEndKey(), latestRange.isEndKeyInclusive()),
                            latestColumnFamilies, latestInclusive);
                else {
                    finished = true;
                    break;
                }
            }
        }
        lastRowFound = getSource().hasTop() ? getSource().getTopKey().getRow(lastRowFound) : null;
    }

    @Override
    public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
        // save parameters for future internal seeks
        latestRange = range;
        latestColumnFamilies = columnFamilies;
        latestInclusive = inclusive;
        lastRowFound = null;

        Key startKey = range.getStartKey();
        Range seekRange = new Range(startKey == null ? null : new Key(startKey.getRow()), true, range.getEndKey(),
                range.isEndKeyInclusive());
        getSource().seek(seekRange, columnFamilies, inclusive);
        finished = false;

        if (getSource().hasTop()) {
            lastRowFound = getSource().getTopKey().getRow();
            if (hasSeeked && range.beforeStartKey(getSource().getTopKey()))
                skipRow();
        }

        hasSeeked = true;

        prepKeys();
    }

    @Override
    public Key getTopKey() {
        return topKey;
    }

    @Override
    public Value getTopValue() {
        return topValue;
    }

    @Override
    public IteratorOptions describeOptions() {
        String name = "firstNEntriesInRowIterator";
        String desc = "Only allows iteration over the first n entries per row";
        HashMap<String, String> namedOptions = new HashMap<String, String>();
        namedOptions.put(NUM_SCANS_STRING_NAME, "Number of scans to try before seeking [10]");
        namedOptions.put(NUM_KEYS_STRING_NAME, "Number of entries to keep per row [25]");
        return new IteratorOptions(name, desc, namedOptions, null);
    }

    @Override
    public boolean validateOptions(Map<String, String> options) {
        try {
            String o = options.get(NUM_SCANS_STRING_NAME);
            if (o != null)
                Integer.parseInt(o);

            String nStr = options.get(NUM_KEYS_STRING_NAME);
            if (nStr != null)
                Integer.parseInt(nStr);
        } catch (Exception e) {
            throw new IllegalArgumentException(
                    "bad integer " + NUM_SCANS_STRING_NAME + ":" + options.get(NUM_SCANS_STRING_NAME), e);
        }
        return true;
    }

}