voldemort.store.readonly.mr.HadoopStoreBuilderTest.java Source code

Java tutorial

Introduction

Here is the source code for voldemort.store.readonly.mr.HadoopStoreBuilderTest.java

Source

/*
 * Copyright 2008-2009 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package voldemort.store.readonly.mr;

import static org.junit.Assert.fail;

import java.io.File;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.TextInputFormat;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;

import voldemort.ServerTestUtils;
import voldemort.TestUtils;
import voldemort.client.RoutingTier;
import voldemort.cluster.Cluster;
import voldemort.routing.RoutingStrategyFactory;
import voldemort.routing.RoutingStrategyType;
import voldemort.serialization.DefaultSerializerFactory;
import voldemort.serialization.Serializer;
import voldemort.serialization.SerializerDefinition;
import voldemort.store.Store;
import voldemort.store.StoreDefinition;
import voldemort.store.StoreDefinitionBuilder;
import voldemort.store.readonly.BinarySearchStrategy;
import voldemort.store.readonly.InterpolationSearchStrategy;
import voldemort.store.readonly.ReadOnlyStorageConfiguration;
import voldemort.store.readonly.ReadOnlyStorageEngine;
import voldemort.store.readonly.ReadOnlyStorageFormat;
import voldemort.store.readonly.ReadOnlyStorageMetadata;
import voldemort.store.readonly.SearchStrategy;
import voldemort.store.readonly.checksum.CheckSum;
import voldemort.store.readonly.checksum.CheckSumTests;
import voldemort.store.readonly.checksum.CheckSum.CheckSumType;
import voldemort.store.readonly.fetcher.HdfsFetcher;
import voldemort.store.serialized.SerializingStore;
import voldemort.utils.ByteArray;
import voldemort.utils.ByteUtils;
import voldemort.utils.ClosableIterator;
import voldemort.utils.Pair;
import voldemort.versioning.Versioned;

/**
 * Unit test to check Read-Only Batch Indexer <strong>in Local mode numReduce
 * will be only one hence we will see only one node files irrespective of
 * cluster details.</strong>
 * 
 * 
 */
@RunWith(Parameterized.class)
@SuppressWarnings("deprecation")
public class HadoopStoreBuilderTest {

    private SearchStrategy searchStrategy;
    private boolean saveKeys;

    @Parameters
    public static Collection<Object[]> configs() {
        return Arrays.asList(
                new Object[][] { { new BinarySearchStrategy(), true }, { new InterpolationSearchStrategy(), true },
                        { new BinarySearchStrategy(), false }, { new InterpolationSearchStrategy(), false } });
    }

    public HadoopStoreBuilderTest(SearchStrategy searchStrategy, boolean saveKeys) {
        this.saveKeys = saveKeys;
        this.searchStrategy = searchStrategy;
    }

    public static class TextStoreMapper extends AbstractHadoopStoreBuilderMapper<LongWritable, Text> {

        @Override
        public Object makeKey(LongWritable key, Text value) {
            String[] tokens = value.toString().split("\\s+");
            return tokens[0];
        }

        @Override
        public Object makeValue(LongWritable key, Text value) {
            String[] tokens = value.toString().split("\\s+");
            return tokens[1];
        }

    }

    /**
     * Issue 258 : 'node--1' produced during store building if some reducer does
     * not get any data.
     * 
     * @throws Exception
     */
    @Test
    public void testRowsLessThanNodes() throws Exception {
        Map<String, String> values = new HashMap<String, String>();
        File testDir = TestUtils.createTempDir();
        File tempDir = new File(testDir, "temp");
        File outputDir = new File(testDir, "output");

        // write test data to text file
        File inputFile = File.createTempFile("input", ".txt", testDir);
        inputFile.deleteOnExit();
        StringBuilder contents = new StringBuilder();
        for (Map.Entry<String, String> entry : values.entrySet())
            contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
        FileUtils.writeStringToFile(inputFile, contents.toString());

        String storeName = "test";
        SerializerDefinition serDef = new SerializerDefinition("string");
        Cluster cluster = ServerTestUtils.getLocalCluster(10);

        // Test backwards compatibility
        StoreDefinition def = new StoreDefinitionBuilder().setName(storeName)
                .setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(serDef).setValueSerializer(serDef)
                .setRoutingPolicy(RoutingTier.CLIENT)
                .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(1)
                .setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build();
        HadoopStoreBuilder builder = new HadoopStoreBuilder(new Configuration(), TextStoreMapper.class,
                TextInputFormat.class, cluster, def, 64 * 1024, new Path(tempDir.getAbsolutePath()),
                new Path(outputDir.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5,
                saveKeys, false);
        builder.build();

        // Should not produce node--1 directory + have one folder for every node
        Assert.assertEquals(cluster.getNumberOfNodes(), outputDir.listFiles().length);
        for (File f : outputDir.listFiles()) {
            Assert.assertFalse(f.toString().contains("node--1"));
        }

        // Check if individual nodes exist, along with their metadata file
        for (int nodeId = 0; nodeId < 10; nodeId++) {
            File nodeFile = new File(outputDir, "node-" + Integer.toString(nodeId));
            Assert.assertTrue(nodeFile.exists());
            Assert.assertTrue(new File(nodeFile, ".metadata").exists());
        }
    }

    @Test
    public void testHadoopBuild() throws Exception {
        // create test data
        Map<String, String> values = new HashMap<String, String>();
        File testDir = TestUtils.createTempDir();
        File tempDir = new File(testDir, "temp"), tempDir2 = new File(testDir, "temp2");
        File outputDir = new File(testDir, "output"), outputDir2 = new File(testDir, "output2");
        File storeDir = TestUtils.createTempDir(testDir);
        for (int i = 0; i < 200; i++)
            values.put(Integer.toString(i), Integer.toBinaryString(i));

        // write test data to text file
        File inputFile = File.createTempFile("input", ".txt", testDir);
        inputFile.deleteOnExit();
        StringBuilder contents = new StringBuilder();
        for (Map.Entry<String, String> entry : values.entrySet())
            contents.append(entry.getKey() + "\t" + entry.getValue() + "\n");
        FileUtils.writeStringToFile(inputFile, contents.toString());

        String storeName = "test";
        SerializerDefinition serDef = new SerializerDefinition("string");
        Cluster cluster = ServerTestUtils.getLocalCluster(1);

        // Test backwards compatibility
        StoreDefinition def = new StoreDefinitionBuilder().setName(storeName)
                .setType(ReadOnlyStorageConfiguration.TYPE_NAME).setKeySerializer(serDef).setValueSerializer(serDef)
                .setRoutingPolicy(RoutingTier.CLIENT)
                .setRoutingStrategyType(RoutingStrategyType.CONSISTENT_STRATEGY).setReplicationFactor(1)
                .setPreferredReads(1).setRequiredReads(1).setPreferredWrites(1).setRequiredWrites(1).build();
        HadoopStoreBuilder builder = new HadoopStoreBuilder(new Configuration(), TextStoreMapper.class,
                TextInputFormat.class, cluster, def, 64 * 1024, new Path(tempDir2.getAbsolutePath()),
                new Path(outputDir2.getAbsolutePath()), new Path(inputFile.getAbsolutePath()), CheckSumType.MD5,
                saveKeys, false);
        builder.build();

        builder = new HadoopStoreBuilder(new Configuration(), TextStoreMapper.class, TextInputFormat.class, cluster,
                def, 64 * 1024, new Path(tempDir.getAbsolutePath()), new Path(outputDir.getAbsolutePath()),
                new Path(inputFile.getAbsolutePath()), CheckSumType.MD5, saveKeys, false);
        builder.build();

        // Check if checkSum is generated in outputDir
        File nodeFile = new File(outputDir, "node-0");

        // Check if metadata file exists
        File metadataFile = new File(nodeFile, ".metadata");
        Assert.assertTrue(metadataFile.exists());

        ReadOnlyStorageMetadata metadata = new ReadOnlyStorageMetadata(metadataFile);
        if (saveKeys)
            Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.FORMAT),
                    ReadOnlyStorageFormat.READONLY_V2.getCode());
        else
            Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.FORMAT),
                    ReadOnlyStorageFormat.READONLY_V1.getCode());

        Assert.assertEquals(metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE),
                CheckSum.toString(CheckSumType.MD5));

        // Check contents of checkSum file
        byte[] md5 = Hex.decodeHex(((String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM)).toCharArray());
        byte[] checkSumBytes = CheckSumTests.calculateCheckSum(nodeFile.listFiles(), CheckSumType.MD5);
        Assert.assertEquals(0, ByteUtils.compare(checkSumBytes, md5));

        // check if fetching works
        HdfsFetcher fetcher = new HdfsFetcher();

        // Fetch to version directory
        File versionDir = new File(storeDir, "version-0");
        fetcher.fetch(nodeFile.getAbsolutePath(), versionDir.getAbsolutePath());
        Assert.assertTrue(versionDir.exists());

        // open store
        @SuppressWarnings("unchecked")
        Serializer<Object> serializer = (Serializer<Object>) new DefaultSerializerFactory().getSerializer(serDef);
        ReadOnlyStorageEngine engine = new ReadOnlyStorageEngine(storeName, searchStrategy,
                new RoutingStrategyFactory().updateRoutingStrategy(def, cluster), 0, storeDir, 1);
        Store<Object, Object, Object> store = SerializingStore.wrap(engine, serializer, serializer, serializer);

        // check values
        for (Map.Entry<String, String> entry : values.entrySet()) {
            List<Versioned<Object>> found = store.get(entry.getKey(), null);
            Assert.assertEquals("Incorrect number of results", 1, found.size());
            Assert.assertEquals(entry.getValue(), found.get(0).getValue());
        }

        // also check the iterator - first key iterator...
        try {
            ClosableIterator<ByteArray> keyIterator = engine.keys();
            if (!saveKeys) {
                fail("Should have thrown an exception since this RO format does not support iterators");
            }
            int numElements = 0;
            while (keyIterator.hasNext()) {
                Assert.assertTrue(values.containsKey(serializer.toObject(keyIterator.next().get())));
                numElements++;
            }

            Assert.assertEquals(numElements, values.size());
        } catch (UnsupportedOperationException e) {
            if (saveKeys) {
                fail("Should not have thrown an exception since this RO format does support iterators");
            }
        }

        // ... and entry iterator
        try {
            ClosableIterator<Pair<ByteArray, Versioned<byte[]>>> entryIterator = engine.entries();
            if (!saveKeys) {
                fail("Should have thrown an exception since this RO format does not support iterators");
            }
            int numElements = 0;
            while (entryIterator.hasNext()) {
                Pair<ByteArray, Versioned<byte[]>> entry = entryIterator.next();
                Assert.assertEquals(values.get(serializer.toObject(entry.getFirst().get())),
                        serializer.toObject(entry.getSecond().getValue()));
                numElements++;
            }

            Assert.assertEquals(numElements, values.size());
        } catch (UnsupportedOperationException e) {
            if (saveKeys) {
                fail("Should not have thrown an exception since this RO format does support iterators");
            }
        }
    }
}