Java tutorial
/* * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cascading.tuple.hadoop.util; import java.io.IOException; import java.util.Comparator; import cascading.CascadingException; import cascading.flow.hadoop.util.HadoopUtil; import cascading.tuple.Fields; import cascading.tuple.StreamComparator; import cascading.tuple.Tuple; import cascading.tuple.hadoop.TupleSerialization; import cascading.tuple.hadoop.io.BufferedInputStream; import cascading.tuple.hadoop.io.HadoopTupleInputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.RawComparator; /** Class DeserializerComparator is the base class for all Cascading comparator classes. */ public abstract class DeserializerComparator<T> extends Configured implements RawComparator<T> { final BufferedInputStream lhsBuffer = new BufferedInputStream(); final BufferedInputStream rhsBuffer = new BufferedInputStream(); TupleSerialization tupleSerialization; HadoopTupleInputStream lhsStream; HadoopTupleInputStream rhsStream; Comparator[] groupComparators; @Override public void setConf(Configuration conf) { if (conf == null) return; super.setConf(conf); tupleSerialization = new TupleSerialization(conf); // get new readers so deserializers don't compete for the buffer lhsStream = new HadoopTupleInputStream(lhsBuffer, tupleSerialization.getElementReader()); rhsStream = new HadoopTupleInputStream(rhsBuffer, tupleSerialization.getElementReader()); groupComparators = deserializeComparatorsFor("cascading.group.comparator"); groupComparators = delegatingComparatorsFor(groupComparators); } Comparator[] deserializeComparatorsFor(String name) { Configuration conf = getConf(); if (conf == null) throw new IllegalStateException("no conf set"); return getFieldComparatorsFrom(conf, name); } public static Comparator[] getFieldComparatorsFrom(Configuration conf, String name) { String value = conf.get(name); if (value == null) return new Comparator[conf.getInt(name + ".size", 1)]; try { return HadoopUtil.deserializeBase64(value, conf, Fields.class).getComparators(); } catch (IOException exception) { throw new CascadingException("unable to deserialize comparators for: " + name); } } Comparator[] delegatingComparatorsFor(Comparator[] fieldComparators) { Comparator[] comparators = new Comparator[fieldComparators.length]; for (int i = 0; i < comparators.length; i++) { if (fieldComparators[i] instanceof StreamComparator) comparators[i] = new TupleElementStreamComparator((StreamComparator) fieldComparators[i]); else if (fieldComparators[i] != null) comparators[i] = new TupleElementComparator(fieldComparators[i]); else comparators[i] = new DelegatingTupleElementComparator(tupleSerialization); } return comparators; } final int compareTuples(Comparator[] comparators, Tuple lhs, Tuple rhs) { int lhsLen = lhs.size(); int rhsLen = rhs.size(); int c = lhsLen - rhsLen; if (c != 0) return c; for (int i = 0; i < lhsLen; i++) { // hack to support comparators array length of 1 Object lhsObject = lhs.getObject(i); Object rhsObject = rhs.getObject(i); try { c = comparators[i % comparators.length].compare(lhsObject, rhsObject); } catch (Exception exception) { throw new CascadingException("unable to compare object elements in position: " + i + " lhs: '" + lhsObject + "' rhs: '" + rhsObject + "'", exception); } if (c != 0) return c; } return 0; } final int compareTuples(Comparator[] comparators) throws IOException { int lhsLen = lhsStream.getNumElements(); int rhsLen = rhsStream.getNumElements(); int c = lhsLen - rhsLen; if (c != 0) return c; for (int i = 0; i < lhsLen; i++) { // hack to support comparators array length of 1 try { c = ((StreamComparator) comparators[i % comparators.length]).compare(lhsStream, rhsStream); } catch (Exception exception) { throw new CascadingException("unable to compare stream elements in position: " + i, exception); } if (c != 0) return c; } return 0; } }