org.apache.accumulo.test.functional.BulkLoadIT.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.accumulo.test.functional.BulkLoadIT.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.test.functional;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.fail;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;

import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.impl.Table;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVWriter;
import org.apache.accumulo.core.file.rfile.RFile;
import org.apache.accumulo.core.metadata.schema.MetadataScanner;
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.harness.AccumuloClusterHarness;
import org.apache.accumulo.minicluster.MemoryUnit;
import org.apache.accumulo.minicluster.ServerType;
import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
import org.apache.accumulo.server.conf.ServerConfigurationFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.Text;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;

/**
 * Tests new bulk import technique. If the old technique ever gets removed this will replace
 * {@link BulkFileIT}
 *
 * @since 2.0
 */
public class BulkLoadIT extends AccumuloClusterHarness {

    @Override
    public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration conf) {
        cfg.setMemory(ServerType.TABLET_SERVER, 128 * 4, MemoryUnit.MEGABYTE);

        // use raw local file system
        conf.set("fs.file.impl", RawLocalFileSystem.class.getName());
    }

    @Override
    protected int defaultTimeoutSeconds() {
        return 4 * 60;
    }

    private String tableName;
    private AccumuloConfiguration aconf;
    private FileSystem fs;
    private String rootPath;

    @Before
    public void setupBulkTest() throws Exception {
        Connector c = getConnector();
        tableName = getUniqueNames(1)[0];
        c.tableOperations().create(tableName);
        aconf = new ServerConfigurationFactory(c.getInstance()).getSystemConfiguration();
        fs = getCluster().getFileSystem();
        rootPath = cluster.getTemporaryPath().toString();
    }

    private String getDir(String testName) throws Exception {
        String dir = rootPath + testName + getUniqueNames(1)[0];
        fs.delete(new Path(dir), true);
        return dir;
    }

    private void testSingleTabletSingleFile(boolean offline) throws Exception {
        Connector c = getConnector();
        addSplits(tableName, "0333");

        if (offline)
            c.tableOperations().offline(tableName);

        String dir = getDir("/testSingleTabletSingleFileNoSplits-");

        String h1 = writeData(dir + "/f1.", aconf, 0, 332);

        c.tableOperations().addFilesTo(tableName).from(dir).load();

        if (offline)
            c.tableOperations().online(tableName);

        verifyData(tableName, 0, 332);
        verifyMetadata(tableName, ImmutableMap.of("0333", ImmutableSet.of(h1), "null", ImmutableSet.of()));
    }

    @Test
    public void testSingleTabletSingleFile() throws Exception {
        testSingleTabletSingleFile(false);
    }

    @Test
    public void testSingleTabletSingleFileOffline() throws Exception {
        testSingleTabletSingleFile(true);
    }

    private void testSingleTabletSingleFileNoSplits(boolean offline) throws Exception {
        Connector c = getConnector();

        if (offline)
            c.tableOperations().offline(tableName);

        String dir = getDir("/testSingleTabletSingleFileNoSplits-");

        String h1 = writeData(dir + "/f1.", aconf, 0, 333);

        c.tableOperations().addFilesTo(tableName).from(dir).load();

        if (offline)
            c.tableOperations().online(tableName);

        verifyData(tableName, 0, 333);
        verifyMetadata(tableName, ImmutableMap.of("null", ImmutableSet.of(h1)));
    }

    @Test
    public void testSingleTabletSingleFileNoSplits() throws Exception {
        testSingleTabletSingleFileNoSplits(false);
    }

    @Test
    public void testSingleTabletSingleFileNoSplitsOffline() throws Exception {
        testSingleTabletSingleFileNoSplits(true);
    }

    @Test
    public void testBadPermissions() throws Exception {
        Connector c = getConnector();
        addSplits(tableName, "0333");

        String dir = getDir("/testBadPermissions-");

        writeData(dir + "/f1.", aconf, 0, 333);

        Path rFilePath = new Path(dir, "f1." + RFile.EXTENSION);
        FsPermission originalPerms = fs.getFileStatus(rFilePath).getPermission();
        fs.setPermission(rFilePath, FsPermission.valueOf("----------"));
        try {
            c.tableOperations().addFilesTo(tableName).from(dir).load();
        } catch (Exception e) {
            Throwable cause = e.getCause();
            if (!(cause instanceof FileNotFoundException) && !(cause.getCause() instanceof FileNotFoundException))
                fail("Expected FileNotFoundException but threw " + e.getCause());
        } finally {
            fs.setPermission(rFilePath, originalPerms);
        }

        originalPerms = fs.getFileStatus(new Path(dir)).getPermission();
        fs.setPermission(new Path(dir), FsPermission.valueOf("dr--r--r--"));
        try {
            c.tableOperations().addFilesTo(tableName).from(dir).load();
        } catch (AccumuloException ae) {
            if (!(ae.getCause() instanceof FileNotFoundException))
                fail("Expected FileNotFoundException but threw " + ae.getCause());
        } finally {
            fs.setPermission(new Path(dir), originalPerms);
        }
    }

    private void testBulkFile(boolean offline) throws Exception {
        Connector c = getConnector();
        addSplits(tableName, "0333 0666 0999 1333 1666");

        if (offline)
            c.tableOperations().offline(tableName);

        String dir = getDir("/testBulkFile-");

        Map<String, Set<String>> hashes = new HashMap<>();
        for (String endRow : Arrays.asList("0333 0666 0999 1333 1666 null".split(" "))) {
            hashes.put(endRow, new HashSet<>());
        }

        // 1 Tablet 0333-null
        String h1 = writeData(dir + "/f1.", aconf, 0, 333);
        hashes.get("0333").add(h1);

        // 2 Tablets 0666-0334, 0999-0667
        String h2 = writeData(dir + "/f2.", aconf, 334, 999);
        hashes.get("0666").add(h2);
        hashes.get("0999").add(h2);

        // 2 Tablets 1333-1000, 1666-1334
        String h3 = writeData(dir + "/f3.", aconf, 1000, 1499);
        hashes.get("1333").add(h3);
        hashes.get("1666").add(h3);

        // 2 Tablets 1666-1334, >1666
        String h4 = writeData(dir + "/f4.", aconf, 1500, 1999);
        hashes.get("1666").add(h4);
        hashes.get("null").add(h4);

        c.tableOperations().addFilesTo(tableName).from(dir).load();

        if (offline)
            c.tableOperations().online(tableName);

        verifyData(tableName, 0, 1999);
        verifyMetadata(tableName, hashes);
    }

    @Test
    public void testBulkFile() throws Exception {
        testBulkFile(false);
    }

    @Test
    public void testBulkFileOffline() throws Exception {
        testBulkFile(true);
    }

    private void addSplits(String tableName, String splitString) throws Exception {
        SortedSet<Text> splits = new TreeSet<>();
        for (String split : splitString.split(" "))
            splits.add(new Text(split));
        getConnector().tableOperations().addSplits(tableName, splits);
    }

    private void verifyData(String table, int s, int e) throws Exception {
        try (Scanner scanner = getConnector().createScanner(table, Authorizations.EMPTY)) {

            Iterator<Entry<Key, Value>> iter = scanner.iterator();

            for (int i = s; i <= e; i++) {
                if (!iter.hasNext())
                    throw new Exception("row " + i + " not found");

                Entry<Key, Value> entry = iter.next();

                String row = String.format("%04d", i);

                if (!entry.getKey().getRow().equals(new Text(row)))
                    throw new Exception("unexpected row " + entry.getKey() + " " + i);

                if (Integer.parseInt(entry.getValue().toString()) != i)
                    throw new Exception("unexpected value " + entry + " " + i);
            }

            if (iter.hasNext())
                throw new Exception("found more than expected " + iter.next());
        }
    }

    private void verifyMetadata(String tableName, Map<String, Set<String>> expectedHashes) throws Exception {

        Set<String> endRowsSeen = new HashSet<>();

        String id = getConnector().tableOperations().tableIdMap().get(tableName);
        try (MetadataScanner scanner = MetadataScanner.builder().from(getConnector()).scanMetadataTable()
                .overRange(Table.ID.of(id)).fetchFiles().fetchLoaded().fetchPrev().build()) {
            for (TabletMetadata tablet : scanner) {
                Assert.assertTrue(tablet.getLoaded().isEmpty());

                Set<String> fileHashes = tablet.getFiles().stream().map(f -> hash(f)).collect(Collectors.toSet());

                String endRow = tablet.getEndRow() == null ? "null" : tablet.getEndRow().toString();

                Assert.assertEquals(expectedHashes.get(endRow), fileHashes);

                endRowsSeen.add(endRow);
            }

            Assert.assertEquals(expectedHashes.keySet(), endRowsSeen);
        }
    }

    private String hash(String filename) {
        try {
            byte data[] = Files.readAllBytes(Paths.get(filename.replaceFirst("^file:", "")));
            byte hash[] = MessageDigest.getInstance("SHA1").digest(data);
            return new BigInteger(1, hash).toString(16);
        } catch (IOException | NoSuchAlgorithmException e) {
            throw new RuntimeException(e);
        }
    }

    private String writeData(String file, AccumuloConfiguration aconf, int s, int e) throws Exception {
        FileSystem fs = getCluster().getFileSystem();
        String filename = file + RFile.EXTENSION;
        try (FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
                .forFile(filename, fs, fs.getConf()).withTableConfiguration(aconf).build()) {
            writer.startDefaultLocalityGroup();
            for (int i = s; i <= e; i++) {
                writer.append(new Key(new Text(String.format("%04d", i))),
                        new Value(Integer.toString(i).getBytes(UTF_8)));
            }
        }

        return hash(filename);
    }
}