Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.accumulo.test.continuous; import static java.nio.charset.StandardCharsets.UTF_8; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Random; import java.util.Set; import org.apache.accumulo.core.cli.MapReduceClientOnDefaultTable; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Range; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.util.CachedConfiguration; import org.apache.accumulo.test.continuous.ContinuousWalk.BadChecksumException; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.beust.jcommander.Parameter; import com.beust.jcommander.validators.PositiveInteger; /** * A map reduce job that verifies a table created by continuous ingest. It verifies that all referenced nodes are defined. */ public class ContinuousVerify extends Configured implements Tool { public static final VLongWritable DEF = new VLongWritable(-1); public static class CMapper extends Mapper<Key, Value, LongWritable, VLongWritable> { private static final Logger log = LoggerFactory.getLogger(CMapper.class); private LongWritable row = new LongWritable(); private LongWritable ref = new LongWritable(); private VLongWritable vrow = new VLongWritable(); private long corrupt = 0; @Override public void map(Key key, Value data, Context context) throws IOException, InterruptedException { long r = Long.parseLong(key.getRow().toString(), 16); if (r < 0) throw new IllegalArgumentException(); try { ContinuousWalk.validate(key, data); } catch (BadChecksumException bce) { context.getCounter(Counts.CORRUPT).increment(1l); if (corrupt < 1000) { log.error("Bad checksum : " + key); } else if (corrupt == 1000) { System.out.println("Too many bad checksums, not printing anymore!"); } corrupt++; return; } row.set(r); context.write(row, DEF); byte[] val = data.get(); int offset = ContinuousWalk.getPrevRowOffset(val); if (offset > 0) { ref.set(Long.parseLong(new String(val, offset, 16, UTF_8), 16)); vrow.set(r); context.write(ref, vrow); } } } public static enum Counts { UNREFERENCED, UNDEFINED, REFERENCED, CORRUPT } public static class CReducer extends Reducer<LongWritable, VLongWritable, Text, Text> { private ArrayList<Long> refs = new ArrayList<>(); @Override public void reduce(LongWritable key, Iterable<VLongWritable> values, Context context) throws IOException, InterruptedException { int defCount = 0; refs.clear(); for (VLongWritable type : values) { if (type.get() == -1) { defCount++; } else { refs.add(type.get()); } } if (defCount == 0 && refs.size() > 0) { StringBuilder sb = new StringBuilder(); String comma = ""; for (Long ref : refs) { sb.append(comma); comma = ","; sb.append(new String(ContinuousIngest.genRow(ref), UTF_8)); } context.write(new Text(ContinuousIngest.genRow(key.get())), new Text(sb.toString())); context.getCounter(Counts.UNDEFINED).increment(1l); } else if (defCount > 0 && refs.size() == 0) { context.getCounter(Counts.UNREFERENCED).increment(1l); } else { context.getCounter(Counts.REFERENCED).increment(1l); } } } static class Opts extends MapReduceClientOnDefaultTable { @Parameter(names = "--output", description = "location in HDFS to store the results; must not exist") String outputDir = "/tmp/continuousVerify"; @Parameter(names = "--maxMappers", description = "the maximum number of mappers to use", validateWith = PositiveInteger.class) int maxMaps = 1; @Parameter(names = "--reducers", description = "the number of reducers to use", validateWith = PositiveInteger.class) int reducers = 1; @Parameter(names = "--offline", description = "perform the verification directly on the files while the table is offline") boolean scanOffline = false; public Opts() { super("ci"); } } @Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(this.getClass().getName(), args); Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); job.setInputFormatClass(AccumuloInputFormat.class); opts.setAccumuloConfigs(job); Set<Range> ranges = null; String clone = opts.getTableName(); Connector conn = null; if (opts.scanOffline) { Random random = new Random(); clone = opts.getTableName() + "_" + String.format("%016x", (random.nextLong() & 0x7fffffffffffffffl)); conn = opts.getConnector(); conn.tableOperations().clone(opts.getTableName(), clone, true, new HashMap<String, String>(), new HashSet<String>()); ranges = conn.tableOperations().splitRangeByTablets(opts.getTableName(), new Range(), opts.maxMaps); conn.tableOperations().offline(clone); AccumuloInputFormat.setInputTableName(job, clone); AccumuloInputFormat.setOfflineTableScan(job, true); } else { ranges = opts.getConnector().tableOperations().splitRangeByTablets(opts.getTableName(), new Range(), opts.maxMaps); } AccumuloInputFormat.setRanges(job, ranges); AccumuloInputFormat.setAutoAdjustRanges(job, false); job.setMapperClass(CMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(VLongWritable.class); job.setReducerClass(CReducer.class); job.setNumReduceTasks(opts.reducers); job.setOutputFormatClass(TextOutputFormat.class); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", opts.scanOffline); TextOutputFormat.setOutputPath(job, new Path(opts.outputDir)); job.waitForCompletion(true); if (opts.scanOffline) { conn.tableOperations().delete(clone); } opts.stopTracing(); return job.isSuccessful() ? 0 : 1; } /** * * @param args * instanceName zookeepers username password table columns outputpath */ public static void main(String[] args) throws Exception { int res = ToolRunner.run(CachedConfiguration.getInstance(), new ContinuousVerify(), args); if (res != 0) System.exit(res); } }