Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.core.file.rfile; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.accumulo.core.cli.ConfigOpts; import org.apache.accumulo.core.conf.SiteConfiguration; import org.apache.accumulo.core.crypto.CryptoServiceFactory; import org.apache.accumulo.core.crypto.CryptoServiceFactory.ClassloaderType; import org.apache.accumulo.core.crypto.CryptoUtils; import org.apache.accumulo.core.cryptoImpl.NoFileEncrypter; import org.apache.accumulo.core.data.ByteSequence; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.file.FileSKVIterator; import org.apache.accumulo.core.file.blockfile.impl.CachableBlockFile.CachableBuilder; import org.apache.accumulo.core.file.rfile.RFile.Reader; import org.apache.accumulo.core.file.rfile.bcfile.Utils; import org.apache.accumulo.core.summary.SummaryReader; import org.apache.accumulo.core.util.LocalityGroupUtil; import org.apache.accumulo.start.spi.KeywordExecutable; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.beust.jcommander.Parameter; import com.google.auto.service.AutoService; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; @AutoService(KeywordExecutable.class) public class PrintInfo implements KeywordExecutable { private static final Logger log = LoggerFactory.getLogger(PrintInfo.class); static class Opts extends ConfigOpts { @Parameter(names = { "-d", "--dump" }, description = "dump the key/value pairs") boolean dump = false; @Parameter(names = { "-v", "--vis" }, description = "show visibility metrics") boolean vis = false; @Parameter(names = { "--visHash" }, description = "show visibilities as hashes, implies -v") boolean hash = false; @Parameter(names = { "--histogram" }, description = "print a histogram of the key-value sizes") boolean histogram = false; @Parameter(names = { "--printIndex" }, description = "prints information about all the index entries") boolean printIndex = false; @Parameter(names = { "--useSample" }, description = "Use sample data for --dump, --vis, --histogram options") boolean useSample = false; @Parameter(names = { "--summary" }, description = "Print summary data in file") boolean printSummary = false; @Parameter(names = { "--keyStats" }, description = "print key length statistics for index and all data") boolean keyStats = false; @Parameter(description = " <file> { <file> ... }") List<String> files = new ArrayList<>(); @Parameter(names = { "-c", "--config" }, variableArity = true, description = "Comma-separated Hadoop configuration files") List<String> configFiles = new ArrayList<>(); } static class LogHistogram { long[] countBuckets = new long[11]; long[] sizeBuckets = new long[countBuckets.length]; long totalSize = 0; public void add(int size) { int bucket = (int) Math.log10(size); countBuckets[bucket]++; sizeBuckets[bucket] += size; totalSize += size; } public void print(String indent) { System.out.println(indent + "Up to size count %-age"); for (int i = 1; i < countBuckets.length; i++) { System.out.println(String.format("%s%11.0f : %10d %6.2f%%", indent, Math.pow(10, i), countBuckets[i], sizeBuckets[i] * 100. / totalSize)); } } } static class KeyStats { private SummaryStatistics stats = new SummaryStatistics(); private LogHistogram logHistogram = new LogHistogram(); public void add(Key k) { int size = k.getSize(); stats.addValue(size); logHistogram.add(size); } public void print(String indent) { logHistogram.print(indent); System.out.println(); System.out.printf("%smin:%,11.2f max:%,11.2f avg:%,11.2f stddev:%,11.2f\n", indent, stats.getMin(), stats.getMax(), stats.getMean(), stats.getStandardDeviation()); } } public static void main(String[] args) throws Exception { new PrintInfo().execute(args); } @Override public String keyword() { return "rfile-info"; } @Override public String description() { return "Prints rfile info"; } @SuppressFBWarnings(value = "DM_EXIT", justification = "System.exit is fine here because it's a utility class executed by a main()") @Override public void execute(final String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs("accumulo rfile-info", args); if (opts.files.isEmpty()) { System.err.println("No files were given"); System.exit(1); } SiteConfiguration siteConfig = opts.getSiteConfiguration(); Configuration conf = new Configuration(); for (String confFile : opts.configFiles) { log.debug("Adding Hadoop configuration file {}", confFile); conf.addResource(new Path(confFile)); } FileSystem hadoopFs = FileSystem.get(conf); FileSystem localFs = FileSystem.getLocal(conf); LogHistogram kvHistogram = new LogHistogram(); KeyStats dataKeyStats = new KeyStats(); KeyStats indexKeyStats = new KeyStats(); for (String arg : opts.files) { Path path = new Path(arg); FileSystem fs; if (arg.contains(":")) fs = path.getFileSystem(conf); else { log.warn("Attempting to find file across filesystems. Consider providing URI instead of path"); fs = hadoopFs.exists(path) ? hadoopFs : localFs; // fall back to local } System.out.println("Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory())); printCryptoParams(path, fs); CachableBuilder cb = new CachableBuilder().fsPath(fs, path).conf(conf) .cryptoService(CryptoServiceFactory.newInstance(siteConfig, ClassloaderType.JAVA)); Reader iter = new RFile.Reader(cb); MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer(); if (opts.vis || opts.hash) iter.registerMetrics(vmg); iter.printInfo(opts.printIndex); System.out.println(); org.apache.accumulo.core.file.rfile.bcfile.PrintInfo .main(new String[] { "-props", opts.getPropertiesPath(), arg }); Map<String, ArrayList<ByteSequence>> localityGroupCF = null; if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats) { localityGroupCF = iter.getLocalityGroupCF(); FileSKVIterator dataIter; if (opts.useSample) { dataIter = iter.getSample(); if (dataIter == null) { System.out.println("ERROR : This rfile has no sample data"); return; } } else { dataIter = iter; } if (opts.keyStats) { FileSKVIterator indexIter = iter.getIndex(); while (indexIter.hasTop()) { indexKeyStats.add(indexIter.getTopKey()); indexIter.next(); } } for (String lgName : localityGroupCF.keySet()) { LocalityGroupUtil.seek(dataIter, new Range(), lgName, localityGroupCF); while (dataIter.hasTop()) { Key key = dataIter.getTopKey(); Value value = dataIter.getTopValue(); if (opts.dump) { System.out.println(key + " -> " + value); if (System.out.checkError()) return; } if (opts.histogram) { kvHistogram.add(key.getSize() + value.getSize()); } if (opts.keyStats) { dataKeyStats.add(key); } dataIter.next(); } } } if (opts.printSummary) { SummaryReader.print(iter, System.out); } iter.close(); if (opts.vis || opts.hash) { System.out.println(); vmg.printMetrics(opts.hash, "Visibility", System.out); } if (opts.histogram) { System.out.println(); kvHistogram.print(""); } if (opts.keyStats) { System.out.println(); System.out.println("Statistics for keys in data :"); dataKeyStats.print("\t"); System.out.println(); System.out.println("Statistics for keys in index :"); indexKeyStats.print("\t"); } // If the output stream has closed, there is no reason to keep going. if (System.out.checkError()) return; } } /** * Print the unencrypted parameters that tell the Crypto Service how to decrypt the file. This * information is useful for debugging if and how a file was encrypted. */ private void printCryptoParams(Path path, FileSystem fs) { byte[] noCryptoBytes = new NoFileEncrypter().getDecryptionParameters(); try (FSDataInputStream fsDis = fs.open(path)) { long fileLength = fs.getFileStatus(path).getLen(); fsDis.seek(fileLength - 16 - Utils.Version.size() - (Long.BYTES)); long cryptoParamOffset = fsDis.readLong(); fsDis.seek(cryptoParamOffset); byte[] cryptoParams = CryptoUtils.readParams(fsDis); if (!Arrays.equals(noCryptoBytes, cryptoParams)) { System.out.println("Encrypted with Params: " + Key.toPrintableString(cryptoParams, 0, cryptoParams.length, cryptoParams.length)); } else { System.out.println("No on disk encryption detected."); } } catch (IOException ioe) { log.error("Error reading crypto params", ioe); } } }