Java tutorial
/* * Copyright (C) 2013 The Calrissian Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.calrissian.accumulorecipes.commons.hadoop; import static java.util.Arrays.asList; import static org.apache.accumulo.core.util.format.DefaultFormatter.formatEntry; import static org.calrissian.accumulorecipes.commons.iterators.support.EventFields.initializeKryo; import static org.calrissian.accumulorecipes.commons.support.Constants.END_BYTE; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Map; import java.util.Set; import com.esotericsoftware.kryo.Kryo; import com.google.common.base.Function; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.mapreduce.InputFormatBase; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.calrissian.accumulorecipes.commons.domain.Settable; import org.calrissian.accumulorecipes.commons.iterators.BooleanLogicIterator; import org.calrissian.accumulorecipes.commons.iterators.OptimizedQueryIterator; import org.calrissian.accumulorecipes.commons.iterators.SelectFieldsExtractor; import org.calrissian.accumulorecipes.commons.iterators.support.NodeToJexl; import org.calrissian.accumulorecipes.commons.support.qfd.planner.QueryPlanner; import org.calrissian.accumulorecipes.commons.support.qfd.planner.visitors.GlobalIndexVisitor; import org.calrissian.mango.criteria.domain.Node; import org.calrissian.mango.domain.AttributeStore; import org.calrissian.mango.types.TypeRegistry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class BaseQfdInputFormat<T extends AttributeStore, W extends Settable> extends InputFormatBase<Key, W> { public static final Logger log = LoggerFactory.getLogger(BaseQfdInputFormat.class); public static final String SELECT_FIELDS = "selectFields"; protected static void configureScanner(Job job, Set<String> types, Node query, NodeToJexl nodeToJexl, GlobalIndexVisitor globalInexVisitor, TypeRegistry<String> typeRegistry, Class<? extends OptimizedQueryIterator> optimizedQueryIteratorClass) throws AccumuloSecurityException, AccumuloException, TableNotFoundException { QueryPlanner logicalPlan = new QueryPlanner(query, globalInexVisitor, typeRegistry); String jexl = nodeToJexl.transform(types, logicalPlan.getOptimizedQuery()); String originalJexl = nodeToJexl.transform(types, query); log.debug("Original Jexl: " + originalJexl); log.debug("Optimized Jexl: " + jexl); Collection<Range> ranges = new ArrayList<Range>(); if (jexl.equals("()") || jexl.equals("") || logicalPlan.getShards().size() == 0) { ranges.add(new Range(END_BYTE)); } else { for (String shard : logicalPlan.getShards()) ranges.add(new Range(shard)); IteratorSetting setting = new IteratorSetting(16, optimizedQueryIteratorClass); setting.addOption(BooleanLogicIterator.QUERY_OPTION, originalJexl); setting.addOption(BooleanLogicIterator.FIELD_INDEX_QUERY, jexl); addIterator(job, setting); } setRanges(job, ranges); final String[] selectFields = job.getConfiguration().getStrings(SELECT_FIELDS); if (selectFields != null && selectFields.length > 0) { IteratorSetting iteratorSetting = new IteratorSetting(16, SelectFieldsExtractor.class); SelectFieldsExtractor.setSelectFields(iteratorSetting, new HashSet<String>(asList(selectFields))); } } protected abstract Function<Map.Entry<Key, Value>, T> getTransform(Configuration configuration); protected abstract W getWritable(); /** * Sets selection fields on the current configuration. */ public static void setSelectFields(Configuration config, Set<String> selectFields) { if (selectFields != null) config.setStrings(SELECT_FIELDS, selectFields.toArray(new String[] {})); } @Override public RecordReader<Key, W> createRecordReader(InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { final W sharedWritable = getWritable(); Kryo kryo = new Kryo(); initializeKryo(kryo); final Function<Map.Entry<Key, Value>, T> xform = getTransform(context.getConfiguration()); return new RecordReaderBase<Key, W>() { @Override public boolean nextKeyValue() throws IOException, InterruptedException { if (scannerIterator.hasNext()) { ++numKeysRead; Map.Entry<Key, Value> entry = scannerIterator.next(); currentK = currentKey = entry.getKey(); sharedWritable.set(xform.apply(entry)); currentV = sharedWritable; if (log.isTraceEnabled()) log.trace("Processing key/value pair: " + formatEntry(entry, true)); return true; } return false; } }; } }