Java tutorial
/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap.hadoop; import java.io.IOException; import cascading.tap.Tap; import cascading.tap.TapException; import cascading.tuple.Tuple; import cascading.tuple.TupleIterator; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConfigurable; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.log4j.Logger; /** * Class TapIterator is an implementation of {@link TupleIterator}. It is returned by {@link cascading.tap.Tap} instances when * opening the taps resource for reading. */ public class TapIterator implements TupleIterator { /** Field LOG */ private static final Logger LOG = Logger.getLogger(TapIterator.class); /** Field tap */ private final Tap tap; /** Field inputFormat */ private InputFormat inputFormat; /** Field conf */ private final JobConf conf; /** Field splits */ private InputSplit[] splits; /** Field reader */ private RecordReader reader; /** Field key */ private Object key; /** Field value */ private Object value; /** Field currentSplit */ private int currentSplit = 0; /** Field currentTuple */ private Tuple currentTuple; /** Field complete */ private boolean complete = false; /** * Constructor TapIterator creates a new TapIterator instance. * * @param conf of type JobConf * @throws IOException when */ public TapIterator(Tap tap, JobConf conf) throws IOException { this.tap = tap; this.conf = new JobConf(conf); initalize(); } private void initalize() throws IOException { tap.sourceInit(conf); if (!tap.pathExists(conf)) { complete = true; return; } inputFormat = conf.getInputFormat(); if (inputFormat instanceof JobConfigurable) ((JobConfigurable) inputFormat).configure(conf); splits = inputFormat.getSplits(conf, 1); if (splits.length == 0) { complete = true; return; } reader = makeReader(currentSplit); key = reader.createKey(); value = reader.createValue(); if (LOG.isDebugEnabled()) { LOG.debug("found splits: " + splits.length); LOG.debug("using key: " + key.getClass().getName()); LOG.debug("using value: " + value.getClass().getName()); } } private RecordReader makeReader(int currentSplit) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("reading split: " + currentSplit); return inputFormat.getRecordReader(splits[currentSplit], conf, Reporter.NULL); } /** * Method hasNext returns true if there more {@link Tuple} instances available. * * @return boolean */ public boolean hasNext() { getNextTuple(); return !complete; } /** * Method next returns the next {@link Tuple}. * * @return Tuple */ public Tuple next() { try { getNextTuple(); return currentTuple; } finally { currentTuple = null; } } private void getNextTuple() { if (currentTuple != null || reader == null) return; try { if (reader.next(key, value)) { currentTuple = tap.source(key, value); getNextTuple(); // handles case where currentTuple is returned null from the source } else if (currentSplit < splits.length - 1) { reader.close(); reader = makeReader(++currentSplit); getNextTuple(); } else { complete = true; } } catch (IOException exception) { throw new TapException("could not get next tuple", exception); } } public void remove() { throw new UnsupportedOperationException("unimplemented"); } public void close() { try { if (reader != null) reader.close(); } catch (IOException exception) { LOG.warn("exception closing iteraor", exception); } } }