eu.stratosphere.pact.runtime.sort.ExternalSortITCase.java Source code

Java tutorial

Introduction

Here is the source code for eu.stratosphere.pact.runtime.sort.ExternalSortITCase.java

Source

/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.pact.runtime.sort;

import java.util.Comparator;

import junit.framework.Assert;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypeSerializerFactory;
import eu.stratosphere.nephele.services.iomanager.IOManager;
import eu.stratosphere.nephele.services.memorymanager.MemoryManager;
import eu.stratosphere.nephele.services.memorymanager.spi.DefaultMemoryManager;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordComparator;
import eu.stratosphere.api.java.typeutils.runtime.record.RecordSerializerFactory;
import eu.stratosphere.pact.runtime.test.util.DummyInvokable;
import eu.stratosphere.pact.runtime.test.util.RandomIntPairGenerator;
import eu.stratosphere.pact.runtime.test.util.TestData;
import eu.stratosphere.pact.runtime.test.util.TestData.Generator.KeyMode;
import eu.stratosphere.pact.runtime.test.util.TestData.Generator.ValueMode;
import eu.stratosphere.pact.runtime.test.util.TestData.Key;
import eu.stratosphere.pact.runtime.test.util.TestData.Value;
import eu.stratosphere.pact.runtime.test.util.types.IntPair;
import eu.stratosphere.pact.runtime.test.util.types.IntPairComparator;
import eu.stratosphere.pact.runtime.test.util.types.IntPairSerializer;
import eu.stratosphere.types.Record;
import eu.stratosphere.util.MutableObjectIterator;

public class ExternalSortITCase {

    private static final Log LOG = LogFactory.getLog(ExternalSortITCase.class);

    private static final long SEED = 649180756312423613L;

    private static final int KEY_MAX = Integer.MAX_VALUE;

    private static final int VALUE_LENGTH = 114;

    private static final Value VAL = new Value(
            "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");

    private static final int NUM_PAIRS = 200000;

    private static final int MEMORY_SIZE = 1024 * 1024 * 78;

    private final AbstractTask parentTask = new DummyInvokable();

    private IOManager ioManager;

    private MemoryManager memoryManager;

    private TypeSerializerFactory<Record> pactRecordSerializer;

    private TypeComparator<Record> pactRecordComparator;

    // --------------------------------------------------------------------------------------------

    @SuppressWarnings("unchecked")
    @Before
    public void beforeTest() {
        this.memoryManager = new DefaultMemoryManager(MEMORY_SIZE);
        this.ioManager = new IOManager();

        this.pactRecordSerializer = RecordSerializerFactory.get();
        this.pactRecordComparator = new RecordComparator(new int[] { 0 }, new Class[] { TestData.Key.class });
    }

    @After
    public void afterTest() {
        this.ioManager.shutdown();
        if (!this.ioManager.isProperlyShutDown()) {
            Assert.fail("I/O Manager was not properly shut down.");
        }

        if (this.memoryManager != null) {
            Assert.assertTrue("Memory leak: not all segments have been returned to the memory manager.",
                    this.memoryManager.verifyEmpty());
            this.memoryManager.shutdown();
            this.memoryManager = null;
        }
    }

    // --------------------------------------------------------------------------------------------

    @Test
    public void testInMemorySort() throws Exception {
        // comparator
        final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

        final TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM,
                ValueMode.CONSTANT, VAL);
        final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, NUM_PAIRS);

        // merge iterator
        LOG.debug("Initializing sortmerger...");

        Sorter<Record> merger = new UnilateralSortMerger<Record>(this.memoryManager, this.ioManager, source,
                this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, 64 * 1024 * 1024, 2, 0.9f);

        // emit data
        LOG.debug("Reading and sorting data...");

        // check order
        MutableObjectIterator<Record> iterator = merger.getIterator();

        LOG.debug("Checking results...");
        int pairsEmitted = 1;

        Record rec1 = new Record();
        Record rec2 = new Record();

        Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
        while ((rec2 = iterator.next(rec2)) != null) {
            final Key k1 = rec1.getField(0, TestData.Key.class);
            final Key k2 = rec2.getField(0, TestData.Key.class);
            pairsEmitted++;

            Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

            Record tmp = rec1;
            rec1 = rec2;
            k1.setKey(k2.getKey());

            rec2 = tmp;
        }
        Assert.assertTrue(NUM_PAIRS == pairsEmitted);

        merger.close();
    }

    @Test
    public void testInMemorySortUsing10Buffers() throws Exception {
        // comparator
        final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

        final TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM,
                ValueMode.CONSTANT, VAL);
        final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, NUM_PAIRS);

        // merge iterator
        LOG.debug("Initializing sortmerger...");

        Sorter<Record> merger = new UnilateralSortMerger<Record>(this.memoryManager, this.ioManager, source,
                this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, 64 * 1024 * 1024, 10, 2,
                0.9f);

        // emit data
        LOG.debug("Reading and sorting data...");

        // check order
        MutableObjectIterator<Record> iterator = merger.getIterator();

        LOG.debug("Checking results...");
        int pairsEmitted = 1;

        Record rec1 = new Record();
        Record rec2 = new Record();

        Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
        while ((rec2 = iterator.next(rec2)) != null) {
            final Key k1 = rec1.getField(0, TestData.Key.class);
            final Key k2 = rec2.getField(0, TestData.Key.class);
            pairsEmitted++;

            Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

            Record tmp = rec1;
            rec1 = rec2;
            k1.setKey(k2.getKey());

            rec2 = tmp;
        }
        Assert.assertTrue(NUM_PAIRS == pairsEmitted);

        merger.close();
    }

    @Test
    public void testSpillingSort() throws Exception {
        // comparator
        final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

        final TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM,
                ValueMode.CONSTANT, VAL);
        final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, NUM_PAIRS);

        // merge iterator
        LOG.debug("Initializing sortmerger...");

        Sorter<Record> merger = new UnilateralSortMerger<Record>(this.memoryManager, this.ioManager, source,
                this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, 16 * 1024 * 1024, 64, 0.7f);

        // emit data
        LOG.debug("Reading and sorting data...");

        // check order
        MutableObjectIterator<Record> iterator = merger.getIterator();

        LOG.debug("Checking results...");
        int pairsEmitted = 1;

        Record rec1 = new Record();
        Record rec2 = new Record();

        Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
        while ((rec2 = iterator.next(rec2)) != null) {
            final Key k1 = rec1.getField(0, TestData.Key.class);
            final Key k2 = rec2.getField(0, TestData.Key.class);
            pairsEmitted++;

            Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

            Record tmp = rec1;
            rec1 = rec2;
            k1.setKey(k2.getKey());

            rec2 = tmp;
        }
        Assert.assertTrue(NUM_PAIRS == pairsEmitted);

        merger.close();
    }

    @Test
    public void testSpillingSortWithIntermediateMerge() throws Exception {
        // amount of pairs
        final int PAIRS = 10000000;

        // comparator
        final Comparator<TestData.Key> keyComparator = new TestData.KeyComparator();

        final TestData.Generator generator = new TestData.Generator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM,
                ValueMode.FIX_LENGTH);
        final MutableObjectIterator<Record> source = new TestData.GeneratorIterator(generator, PAIRS);

        // merge iterator
        LOG.debug("Initializing sortmerger...");

        Sorter<Record> merger = new UnilateralSortMerger<Record>(this.memoryManager, this.ioManager, source,
                this.parentTask, this.pactRecordSerializer, this.pactRecordComparator, 64 * 1024 * 1024, 16, 0.7f);

        // emit data
        LOG.debug("Emitting data...");

        // check order
        MutableObjectIterator<Record> iterator = merger.getIterator();

        LOG.debug("Checking results...");
        int pairsRead = 1;
        int nextStep = PAIRS / 20;

        Record rec1 = new Record();
        Record rec2 = new Record();

        Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
        while ((rec2 = iterator.next(rec2)) != null) {
            final Key k1 = rec1.getField(0, TestData.Key.class);
            final Key k2 = rec2.getField(0, TestData.Key.class);
            pairsRead++;

            Assert.assertTrue(keyComparator.compare(k1, k2) <= 0);

            Record tmp = rec1;
            rec1 = rec2;
            k1.setKey(k2.getKey());
            rec2 = tmp;

            // log
            if (pairsRead == nextStep) {
                nextStep += PAIRS / 20;
            }

        }
        Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
        merger.close();
    }

    @Test
    public void testSpillingSortWithIntermediateMergeIntPair() throws Exception {
        // amount of pairs
        final int PAIRS = 50000000;

        // comparator
        final RandomIntPairGenerator generator = new RandomIntPairGenerator(12345678, PAIRS);

        final TypeSerializerFactory<IntPair> serializerFactory = new IntPairSerializer.IntPairSerializerFactory();
        final TypeComparator<IntPair> comparator = new IntPairComparator();

        // merge iterator
        LOG.debug("Initializing sortmerger...");

        Sorter<IntPair> merger = new UnilateralSortMerger<IntPair>(this.memoryManager, this.ioManager, generator,
                this.parentTask, serializerFactory, comparator, 64 * 1024 * 1024, 4, 0.7f);

        // emit data
        LOG.debug("Emitting data...");

        // check order
        MutableObjectIterator<IntPair> iterator = merger.getIterator();

        LOG.debug("Checking results...");
        int pairsRead = 1;
        int nextStep = PAIRS / 20;

        IntPair rec1 = new IntPair();
        IntPair rec2 = new IntPair();

        Assert.assertTrue((rec1 = iterator.next(rec1)) != null);

        while ((rec2 = iterator.next(rec2)) != null) {
            final int k1 = rec1.getKey();
            final int k2 = rec2.getKey();
            pairsRead++;

            Assert.assertTrue(k1 - k2 <= 0);

            IntPair tmp = rec1;
            rec1 = rec2;
            rec2 = tmp;

            // log
            if (pairsRead == nextStep) {
                nextStep += PAIRS / 20;
            }
        }
        Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
        merger.close();
    }
}