org.apache.accumulo.test.CountNameNodeOpsBulkIT.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.accumulo.test.CountNameNodeOpsBulkIT.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.test;

import static org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly;
import static org.junit.Assert.assertEquals;

import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.client.Accumulo;
import org.apache.accumulo.core.client.AccumuloClient;
import org.apache.accumulo.core.conf.DefaultConfiguration;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.crypto.CryptoServiceFactory;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVWriter;
import org.apache.accumulo.core.file.rfile.RFile;
import org.apache.accumulo.core.master.thrift.MasterMonitorInfo;
import org.apache.accumulo.minicluster.ServerType;
import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
import org.apache.accumulo.test.functional.ConfigurableMacBase;
import org.apache.accumulo.test.functional.FunctionalTestUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.junit.Test;

import com.google.gson.Gson;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * Originally written for ACCUMULO-3949 and ACCUMULO-3953 to count the number of FileInfo calls to
 * the NameNode. Updated in 2.0 to count the calls for new bulk import comparing it to the old.
 */
public class CountNameNodeOpsBulkIT extends ConfigurableMacBase {

    @Override
    protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
        cfg.setNumTservers(1);
        cfg.useMiniDFS(true);
    }

    @SuppressFBWarnings(value = { "PATH_TRAVERSAL_IN",
            "URLCONNECTION_SSRF_FD" }, justification = "path provided by test; url provided by test")
    private Map<?, ?> getStats() throws Exception {
        String uri = getCluster().getMiniDfs().getHttpUri(0);
        URL url = new URL(uri + "/jmx");
        log.debug("Fetching web page " + url);
        String jsonString = FunctionalTestUtils.readAll(url.openStream());
        Gson gson = new Gson();
        Map<?, ?> jsonObject = gson.fromJson(jsonString, Map.class);
        List<?> beans = (List<?>) jsonObject.get("beans");
        for (Object bean : beans) {
            Map<?, ?> map = (Map<?, ?>) bean;
            if (map.get("name").toString().equals("Hadoop:service=NameNode,name=NameNodeActivity")) {
                return map;
            }
        }
        return new HashMap<>(0);
    }

    private long getStat(Map<?, ?> map, String stat) {
        return (long) Double.parseDouble(map.get(stat).toString());
    }

    @Test
    public void compareOldNewBulkImportTest() throws Exception {
        try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) {
            getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost");
            final String tableName = getUniqueNames(1)[0];
            c.tableOperations().create(tableName);
            // turn off compactions
            c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000");
            c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000");
            // splits to slow down bulk import
            SortedSet<Text> splits = new TreeSet<>();
            for (int i = 1; i < 0xf; i++) {
                splits.add(new Text(Integer.toHexString(i)));
            }
            c.tableOperations().addSplits(tableName, splits);

            MasterMonitorInfo stats = getCluster().getMasterMonitorInfo();
            assertEquals(1, stats.tServerInfo.size());

            log.info("Creating lots of bulk import files");
            final FileSystem fs = getCluster().getFileSystem();
            final Path basePath = getCluster().getTemporaryPath();

            final Path base = new Path(basePath, "testBulkLoad" + tableName);
            fs.delete(base, true);
            fs.mkdirs(base);

            ExecutorService es = Executors.newFixedThreadPool(5);
            List<Future<String>> futures = new ArrayList<>();
            for (int i = 0; i < 10; i++) {
                final int which = i;
                futures.add(es.submit(() -> {
                    Path files = new Path(base, "files" + which);
                    fs.mkdirs(files);
                    for (int i1 = 0; i1 < 100; i1++) {
                        FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
                                .forFile(files + "/bulk_" + i1 + "." + RFile.EXTENSION, fs, fs.getConf(),
                                        CryptoServiceFactory.newDefaultInstance())
                                .withTableConfiguration(DefaultConfiguration.getInstance()).build();
                        writer.startDefaultLocalityGroup();
                        for (int j = 0x100; j < 0xfff; j += 3) {
                            writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
                        }
                        writer.close();
                    }
                    return files.toString();
                }));
            }
            List<String> dirs = new ArrayList<>();
            for (Future<String> f : futures) {
                dirs.add(f.get());
            }
            log.info("Importing");
            long startOps = getStat(getStats(), "FileInfoOps");
            long now = System.currentTimeMillis();
            List<Future<Object>> errs = new ArrayList<>();
            for (String dir : dirs) {
                errs.add(es.submit(() -> {
                    c.tableOperations().importDirectory(dir).to(tableName).load();
                    return null;
                }));
            }
            for (Future<Object> err : errs) {
                err.get();
            }
            es.shutdown();
            es.awaitTermination(2, TimeUnit.MINUTES);
            log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.));
            sleepUninterruptibly(30, TimeUnit.SECONDS);
            Map<?, ?> map = getStats();
            map.forEach((k, v) -> {
                try {
                    if (v != null && Double.parseDouble(v.toString()) > 0.0)
                        log.debug("{}:{}", k, v);
                } catch (NumberFormatException e) {
                    // only looking for numbers
                }
            });
            long getFileInfoOpts = getStat(map, "FileInfoOps") - startOps;
            log.info("New bulk import used {} opts, vs old using 2060", getFileInfoOpts);
            // counts for old bulk import:
            // Expected number of FileInfoOps was between 1000 and 2100
            // new bulk import is way better :)
            assertEquals("unexpected number of FileInfoOps", 20, getFileInfoOpts);
        }
    }
}