Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.test; import static org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly; import static org.junit.Assert.assertEquals; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.accumulo.core.client.Accumulo; import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.conf.DefaultConfiguration; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.crypto.CryptoServiceFactory; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.file.FileOperations; import org.apache.accumulo.core.file.FileSKVWriter; import org.apache.accumulo.core.file.rfile.RFile; import org.apache.accumulo.core.master.thrift.MasterMonitorInfo; import org.apache.accumulo.minicluster.ServerType; import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl; import org.apache.accumulo.test.functional.ConfigurableMacBase; import org.apache.accumulo.test.functional.FunctionalTestUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.junit.Test; import com.google.gson.Gson; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; /** * Originally written for ACCUMULO-3949 and ACCUMULO-3953 to count the number of FileInfo calls to * the NameNode. Updated in 2.0 to count the calls for new bulk import comparing it to the old. */ public class CountNameNodeOpsBulkIT extends ConfigurableMacBase { @Override protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) { cfg.setNumTservers(1); cfg.useMiniDFS(true); } @SuppressFBWarnings(value = { "PATH_TRAVERSAL_IN", "URLCONNECTION_SSRF_FD" }, justification = "path provided by test; url provided by test") private Map<?, ?> getStats() throws Exception { String uri = getCluster().getMiniDfs().getHttpUri(0); URL url = new URL(uri + "/jmx"); log.debug("Fetching web page " + url); String jsonString = FunctionalTestUtils.readAll(url.openStream()); Gson gson = new Gson(); Map<?, ?> jsonObject = gson.fromJson(jsonString, Map.class); List<?> beans = (List<?>) jsonObject.get("beans"); for (Object bean : beans) { Map<?, ?> map = (Map<?, ?>) bean; if (map.get("name").toString().equals("Hadoop:service=NameNode,name=NameNodeActivity")) { return map; } } return new HashMap<>(0); } private long getStat(Map<?, ?> map, String stat) { return (long) Double.parseDouble(map.get(stat).toString()); } @Test public void compareOldNewBulkImportTest() throws Exception { try (AccumuloClient c = Accumulo.newClient().from(getClientProperties()).build()) { getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost"); final String tableName = getUniqueNames(1)[0]; c.tableOperations().create(tableName); // turn off compactions c.tableOperations().setProperty(tableName, Property.TABLE_MAJC_RATIO.getKey(), "2000"); c.tableOperations().setProperty(tableName, Property.TABLE_FILE_MAX.getKey(), "2000"); // splits to slow down bulk import SortedSet<Text> splits = new TreeSet<>(); for (int i = 1; i < 0xf; i++) { splits.add(new Text(Integer.toHexString(i))); } c.tableOperations().addSplits(tableName, splits); MasterMonitorInfo stats = getCluster().getMasterMonitorInfo(); assertEquals(1, stats.tServerInfo.size()); log.info("Creating lots of bulk import files"); final FileSystem fs = getCluster().getFileSystem(); final Path basePath = getCluster().getTemporaryPath(); final Path base = new Path(basePath, "testBulkLoad" + tableName); fs.delete(base, true); fs.mkdirs(base); ExecutorService es = Executors.newFixedThreadPool(5); List<Future<String>> futures = new ArrayList<>(); for (int i = 0; i < 10; i++) { final int which = i; futures.add(es.submit(() -> { Path files = new Path(base, "files" + which); fs.mkdirs(files); for (int i1 = 0; i1 < 100; i1++) { FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder() .forFile(files + "/bulk_" + i1 + "." + RFile.EXTENSION, fs, fs.getConf(), CryptoServiceFactory.newDefaultInstance()) .withTableConfiguration(DefaultConfiguration.getInstance()).build(); writer.startDefaultLocalityGroup(); for (int j = 0x100; j < 0xfff; j += 3) { writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0])); } writer.close(); } return files.toString(); })); } List<String> dirs = new ArrayList<>(); for (Future<String> f : futures) { dirs.add(f.get()); } log.info("Importing"); long startOps = getStat(getStats(), "FileInfoOps"); long now = System.currentTimeMillis(); List<Future<Object>> errs = new ArrayList<>(); for (String dir : dirs) { errs.add(es.submit(() -> { c.tableOperations().importDirectory(dir).to(tableName).load(); return null; })); } for (Future<Object> err : errs) { err.get(); } es.shutdown(); es.awaitTermination(2, TimeUnit.MINUTES); log.info(String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.)); sleepUninterruptibly(30, TimeUnit.SECONDS); Map<?, ?> map = getStats(); map.forEach((k, v) -> { try { if (v != null && Double.parseDouble(v.toString()) > 0.0) log.debug("{}:{}", k, v); } catch (NumberFormatException e) { // only looking for numbers } }); long getFileInfoOpts = getStat(map, "FileInfoOps") - startOps; log.info("New bulk import used {} opts, vs old using 2060", getFileInfoOpts); // counts for old bulk import: // Expected number of FileInfoOps was between 1000 and 2100 // new bulk import is way better :) assertEquals("unexpected number of FileInfoOps", 20, getFileInfoOpts); } } }