Java tutorial
/* * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cascading.scheme.hadoop; import java.beans.ConstructorProperties; import java.io.IOException; import cascading.flow.FlowProcess; import cascading.scheme.SinkCall; import cascading.scheme.SourceCall; import cascading.tap.Tap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.SequenceFileOutputFormat; /** * Class WritableSequenceFile is a sub-class of {@link SequenceFile} that reads and writes values of the given * {@code writableType} {@code Class}, instead of {@link Tuple} instances used by default in SequenceFile. * <p/> * This Class is a convenience for those who need to read/write specific types from existing sequence files without * them being wrapped in a Tuple instance. * <p/> * Note due to the nature of sequence files, only one type can be stored in the key and value positions, they they can be * uniquely different types (LongWritable, Text). * <p/> * If keyType is null, valueType must not be null, and vice versa, assuming you only wish to store a single value. * <p/> * {@link NullWritable} is used as the empty type for either a null keyType or valueType. */ public class WritableSequenceFile extends SequenceFile { protected final Class<? extends Writable> keyType; protected final Class<? extends Writable> valueType; /** * Constructor WritableSequenceFile creates a new WritableSequenceFile instance. * * @param fields of type Fields * @param valueType of type Class<? extends Writable>, may not be null */ @ConstructorProperties({ "fields", "valueType" }) public WritableSequenceFile(Fields fields, Class<? extends Writable> valueType) { this(fields, null, valueType); } /** * Constructor WritableSequenceFile creates a new WritableSequenceFile instance. * * @param fields of type Fields * @param keyType of type Class<? extends Writable> * @param valueType of type Class<? extends Writable> */ @ConstructorProperties({ "fields", "keyType", "valueType" }) public WritableSequenceFile(Fields fields, Class<? extends Writable> keyType, Class<? extends Writable> valueType) { super(fields); this.keyType = keyType; this.valueType = valueType; if (keyType == null && valueType == null) throw new IllegalArgumentException("both keyType and valueType may not be null"); if (keyType == null && fields.size() != 1) throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'keys' from a sequence file"); else if (valueType == null && fields.size() != 1) throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'values' from a sequence file"); else if (keyType != null && valueType != null && fields.size() != 2) throw new IllegalArgumentException( "fields must declare exactly two fields when only reading/writing 'keys' and 'values' from a sequence file"); } @Override public void sinkConfInit(FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf) { if (keyType != null) conf.setClass("mapred.output.key.class", keyType, Object.class); else conf.setClass("mapred.output.key.class", NullWritable.class, Object.class); if (valueType != null) conf.setClass("mapred.output.value.class", valueType, Object.class); else conf.setClass("mapred.output.value.class", NullWritable.class, Object.class); conf.setClass("mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class); } @Override public boolean source(FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { Object key = sourceCall.getContext()[0]; Object value = sourceCall.getContext()[1]; boolean result = sourceCall.getInput().next(key, value); if (!result) return false; int count = 0; TupleEntry entry = sourceCall.getIncomingEntry(); if (keyType != null) entry.setObject(count++, key); if (valueType != null) entry.setObject(count, value); return true; } @Override public void sink(FlowProcess<? extends Configuration> flowProcess, SinkCall<Void, OutputCollector> sinkCall) throws IOException { TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); Writable keyValue = NullWritable.get(); Writable valueValue = NullWritable.get(); if (keyType == null) { valueValue = (Writable) tupleEntry.getObject(0); } else if (valueType == null) { keyValue = (Writable) tupleEntry.getObject(0); } else { keyValue = (Writable) tupleEntry.getObject(0); valueValue = (Writable) tupleEntry.getObject(1); } sinkCall.getOutput().collect(keyValue, valueValue); } @Override public boolean equals(Object object) { if (this == object) return true; if (!(object instanceof WritableSequenceFile)) return false; if (!super.equals(object)) return false; WritableSequenceFile that = (WritableSequenceFile) object; if (keyType != null ? !keyType.equals(that.keyType) : that.keyType != null) return false; if (valueType != null ? !valueType.equals(that.valueType) : that.valueType != null) return false; return true; } @Override public int hashCode() { int result = super.hashCode(); result = 31 * result + (keyType != null ? keyType.hashCode() : 0); result = 31 * result + (valueType != null ? valueType.hashCode() : 0); return result; } }