eu.stratosphere.pact.runtime.sort.CombiningUnilateralSortMergerITCase.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.pact.runtime.sort.CombiningUnilateralSortMergerITCase.java

Source

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.pact.runtime.sort;

import java.io.IOException;
import java.util.Comparator;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.NoSuchElementException;

import junit.framework.Assert;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.api.common.typeutils.TypeSerializerFactory;
import eu.stratosphere.api.java.record.functions.ReduceFunction;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.nephele.services.iomanager.IOManager;
import eu.stratosphere.nephele.services.memorymanager.MemoryManager;
import eu.stratosphere.nephele.services.memorymanager.spi.DefaultMemoryManager;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordComparator;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordSerializerFactory;
import eu.stratosphere.pact.runtime.test.util.DummyInvokable;
import eu.stratosphere.pact.runtime.test.util.TestData;
import eu.stratosphere.pact.runtime.test.util.TestData.Generator.KeyMode;
import eu.stratosphere.pact.runtime.test.util.TestData.Generator.ValueMode;
import eu.stratosphere.pact.runtime.test.util.TestData.Key;
import eu.stratosphere.pact.runtime.util.KeyGroupedIterator;
import eu.stratosphere.types.IntValue;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.Collector;
import eu.stratosphere.util.LogUtils;
import eu.stratosphere.util.MutableObjectIterator;

public class CombiningUnilateralSortMergerITCase {

    private static final Log LOG = LogFactory.getLog(CombiningUnilateralSortMergerITCase.class);

    private static final long SEED = 649180756312423613L;

    private static final int KEY_MAX = 1000;

    private static final int VALUE_LENGTH = 118;

    private static final int NUM_PAIRS = 50000;

    public static final int MEMORY_SIZE = 1024 * 1024 * 256;

    private final AbstractTask parentTask = new DummyInvokable();

    private IOManager ioManager;

    private MemoryManager memoryManager;

    private TypeSerializerFactory<Record> serializerFactory;

    private TypeComparator<Record> comparator;

    @BeforeClass
    public static void setup() {
        LogUtils.initializeDefaultTestConsoleLogger();
    }

    @SuppressWarnings("unchecked")
    @Before
    public void beforeTest() {
        this.memoryManager = new DefaultMemoryManager(MEMORY_SIZE);
        this.ioManager = new IOManager();

        this.serializerFactory = RecordSerializerFactory.get();
        this.comparator = new RecordComparator(new int[] { 0 }, new Class[] { TestData.Key.class });
    }

    @After
    public void afterTest() {
        this.ioManager.shutdown();
        if (!this.ioManager.isProperlyShutDown()) {
            Assert.fail("I/O Manager was not properly shut down.");
        }

        if (this.memoryManager != null) {
            Assert.assertTrue("Memory leak: not all segments have been returned to the memory manager.",
                    this.memoryManager.verifyEmpty());
            this.memoryManager.shutdown();
            this.memoryManager = null;
        }
    }

    @Test
    public void testCombine() throws Exception {
        int noKeys = 100;
        int noKeyCnt = 10000;

        MockRecordReader reader = new MockRecordReader();

        LOG.debug("initializing sortmerger");

        TestCountCombiner comb = new TestCountCombiner();

        Sorter<Record> merger = new CombiningUnilateralSortMerger<Record>(comb, this.memoryManager, this.ioManager,
                reader, this.parentTask, this.serializerFactory, this.comparator, 64 * 1024 * 1024, 64, 0.7f);

        final Record rec = new Record();
        rec.setField(1, new IntValue(1));
        final TestData.Key key = new TestData.Key();

        for (int i = 0; i < noKeyCnt; i++) {
            for (int j = 0; j < noKeys; j++) {
                key.setKey(j);
                rec.setField(0, key);
                reader.emit(rec);
            }
        }
        reader.close();

        MutableObjectIterator<Record> iterator = merger.getIterator();

        Iterator<Integer> result = getReducingIterator(iterator, serializerFactory.getSerializer(),
                comparator.duplicate());
        while (result.hasNext()) {
            Assert.assertEquals(noKeyCnt, result.next().intValue());
        }

        merger.close();

        // if the combiner was opened, it must have been closed
        Assert.assertTrue(comb.opened == comb.closed);
    }

    @Test
    public void testCombineSpilling() throws Exception {
        int noKeys = 100;
        int noKeyCnt = 10000;

        MockRecordReader reader = new MockRecordReader();

        LOG.debug("initializing sortmerger");

        TestCountCombiner comb = new TestCountCombiner();

        Sorter<Record> merger = new CombiningUnilateralSortMerger<Record>(comb, this.memoryManager, this.ioManager,
                reader, this.parentTask, this.serializerFactory, this.comparator, 3 * 1024 * 1024, 64, 0.005f);

        final Record rec = new Record();
        rec.setField(1, new IntValue(1));
        final TestData.Key key = new TestData.Key();

        for (int i = 0; i < noKeyCnt; i++) {
            for (int j = 0; j < noKeys; j++) {
                key.setKey(j);
                rec.setField(0, key);
                reader.emit(rec);
            }
        }
        reader.close();

        MutableObjectIterator<Record> iterator = merger.getIterator();

        Iterator<Integer> result = getReducingIterator(iterator, serializerFactory.getSerializer(),
                comparator.duplicate());
        while (result.hasNext()) {
            Assert.assertEquals(noKeyCnt, result.next().intValue());
        }

        merger.close();

        // if the combiner was opened, it must have been closed
        Assert.assertTrue(comb.opened == comb.closed);
    }

    @Test
    public void testSortAndValidate() throws Exception {
        final Hashtable<TestData.Key, Integer> countTable = new Hashtable<TestData.Key, Integer>(KEY_MAX);
        for (int i = 1; i <= KEY_MAX; i++) {
            countTable.put(new TestData.Key(i), new Integer(0));
        }

        // comparator
        final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

        // reader
        MockRecordReader reader = new MockRecordReader();

        // merge iterator
        LOG.debug("initializing sortmerger");

        TestCountCombiner2 comb = new TestCountCombiner2();

        Sorter<Record> merger = new CombiningUnilateralSortMerger<Record>(comb, this.memoryManager, this.ioManager,
                reader, this.parentTask, this.serializerFactory, this.comparator, 64 * 1024 * 1024, 2, 0.7f);

        // emit data
        LOG.debug("emitting data");
        TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM,
                ValueMode.FIX_LENGTH);
        Record rec = new Record();
        final TestData.Value value = new TestData.Value("1");

        for (int i = 0; i < NUM_PAIRS; i++) {
            Assert.assertTrue((rec = generator.next(rec)) != null);
            final TestData.Key key = rec.getField(0, TestData.Key.class);
            rec.setField(1, value);
            reader.emit(rec);

            countTable.put(new TestData.Key(key.getKey()), countTable.get(key) + 1);
        }
        reader.close();
        rec = null;

        // check order
        MutableObjectIterator<Record> iterator = merger.getIterator();

        LOG.debug("checking results");

        Record rec1 = new Record();
        Record rec2 = new Record();

        Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
        countTable.put(new TestData.Key(rec1.getField(0, TestData.Key.class).getKey()),
                countTable.get(rec1.getField(0, TestData.Key.class))
                        - (Integer.parseInt(rec1.getField(1, TestData.Value.class).toString())));

        while ((rec2 = iterator.next(rec2)) != null) {
            final Key k1 = rec1.getField(0, TestData.Key.class);
            final Key k2 = rec2.getField(0, TestData.Key.class);

            Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);
            countTable.put(new TestData.Key(k2.getKey()),
                    countTable.get(k2) - (Integer.parseInt(rec2.getField(1, TestData.Value.class).toString())));

            Record tmp = rec1;
            rec1 = rec2;
            k1.setKey(k2.getKey());
            rec2 = tmp;
        }

        for (Integer cnt : countTable.values()) {
            Assert.assertTrue(cnt == 0);
        }

        merger.close();

        // if the combiner was opened, it must have been closed
        Assert.assertTrue(comb.opened == comb.closed);
    }

    // --------------------------------------------------------------------------------------------

    public static class TestCountCombiner extends ReduceFunction {
        private static final long serialVersionUID = 1L;

        private final IntValue count = new IntValue();

        public volatile boolean opened = false;

        public volatile boolean closed = false;

        @Override
        public void combine(Iterator<Record> values, Collector<Record> out) {
            Record rec = null;
            int cnt = 0;
            while (values.hasNext()) {
                rec = values.next();
                cnt += rec.getField(1, IntValue.class).getValue();
            }

            this.count.setValue(cnt);
            rec.setField(1, this.count);
            out.collect(rec);
        }

        @Override
        public void reduce(Iterator<Record> values, Collector<Record> out) {
        }

        @Override
        public void open(Configuration parameters) throws Exception {
            opened = true;
        }

        @Override
        public void close() throws Exception {
            closed = true;
        }
    }

    public static class TestCountCombiner2 extends ReduceFunction {
        private static final long serialVersionUID = 1L;

        public volatile boolean opened = false;

        public volatile boolean closed = false;

        @Override
        public void combine(Iterator<Record> values, Collector<Record> out) {
            Record rec = null;
            int cnt = 0;
            while (values.hasNext()) {
                rec = values.next();
                cnt += Integer.parseInt(rec.getField(1, TestData.Value.class).toString());
            }

            out.collect(new Record(rec.getField(0, Key.class), new TestData.Value(cnt + "")));
        }

        @Override
        public void reduce(Iterator<Record> values, Collector<Record> out) {
            // yo, nothing, mon
        }

        @Override
        public void open(Configuration parameters) throws Exception {
            opened = true;
        }

        @Override
        public void close() throws Exception {
            closed = true;
        }
    }

    private static Iterator<Integer> getReducingIterator(MutableObjectIterator<Record> data,
            TypeSerializer<Record> serializer, TypeComparator<Record> comparator) {

        final KeyGroupedIterator<Record> groupIter = new KeyGroupedIterator<Record>(data, serializer, comparator);

        return new Iterator<Integer>() {

            private boolean hasNext = false;

            @Override
            public boolean hasNext() {
                if (hasNext) {
                    return true;
                }

                try {
                    hasNext = groupIter.nextKey();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
                return hasNext;
            }

            @Override
            public Integer next() {
                if (hasNext()) {
                    hasNext = false;

                    Iterator<Record> values = groupIter.getValues();

                    Record rec = null;
                    int cnt = 0;
                    while (values.hasNext()) {
                        rec = values.next();
                        cnt += rec.getField(1, IntValue.class).getValue();
                    }

                    return cnt;
                } else {
                    throw new NoSuchElementException();
                }
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }

        };
    }
}