Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.gora.goraci; import org.apache.gora.goraci.generated.CINode; import org.apache.gora.goraci.generated.Flushed; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import org.apache.avro.util.Utf8; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.gora.mapreduce.GoraMapper; import org.apache.gora.query.Query; import org.apache.gora.query.Result; import org.apache.gora.store.DataStore; import org.apache.gora.store.DataStoreFactory; import org.apache.gora.util.GoraException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * A Map Reduce job that verifies that the linked list generated by {@link org.apache.gora.goraci.Generator} do not have any holes. */ public class Verify extends Configured implements Tool { private static final Log LOG = LogFactory.getLog(Verify.class); private static final VLongWritable DEF = new VLongWritable(-1); private Job job; public static class VerifyMapper extends GoraMapper<Long, CINode, LongWritable, VLongWritable> { private LongWritable row = new LongWritable(); private LongWritable ref = new LongWritable(); private VLongWritable vrow = new VLongWritable(); private Map<Utf8, Long> flushed = null; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); String[] entries = context.getConfiguration().getStrings("org.apache.gora.goraci.verify.flushed"); if (entries != null && entries.length > 0) { flushed = new HashMap<Utf8, Long>(); for (String entry : entries) { String[] kv = entry.split(":"); flushed.put(new Utf8(kv[0]), Long.parseLong(kv[1])); } } } @Override protected void map(Long key, CINode node, Context context) throws IOException, InterruptedException { if (flushed != null) { Long count = flushed.get(node.getClient()); if (count == null || node.getCount() >= count) { context.getCounter(Counts.IGNORED).increment(1); return; } } row.set(key); context.write(row, DEF); if (node.getPrev() >= 0) { ref.set(node.getPrev()); vrow.set(key); context.write(ref, vrow); } } } public static enum Counts { UNREFERENCED, UNDEFINED, REFERENCED, CORRUPT, IGNORED } public static class VerifyReducer extends Reducer<LongWritable, VLongWritable, Text, Text> { private ArrayList<Long> refs = new ArrayList<Long>(); public void reduce(LongWritable key, Iterable<VLongWritable> values, Context context) throws IOException, InterruptedException { int defCount = 0; refs.clear(); for (VLongWritable type : values) { if (type.get() == -1) { defCount++; } else { refs.add(type.get()); } } // TODO check for more than one def, should not happen if (defCount == 0 && refs.size() > 0) { // this is bad, found a node that is referenced but not defined. It must have been lost, emit some info about this node for debugging purposes. StringBuilder sb = new StringBuilder(); String comma = ""; for (Long ref : refs) { sb.append(comma); comma = ","; sb.append(String.format("%016x", ref)); } context.write(new Text(String.format("%016x", key.get())), new Text(sb.toString())); context.getCounter(Counts.UNDEFINED).increment(1); } else if (defCount > 0 && refs.size() == 0) { // node is defined but not referenced context.getCounter(Counts.UNREFERENCED).increment(1); } else { // node is defined and referenced context.getCounter(Counts.REFERENCED).increment(1); } } } @Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption("c", "concurrent", false, "run concurrently with generation"); GnuParser parser = new GnuParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); if (cmd.getArgs().length != 2) { throw new ParseException("Did not see expected # of arguments, saw " + cmd.getArgs().length); } } catch (ParseException e) { System.err.println("Failed to parse command line " + e.getMessage()); System.err.println(); HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(getClass().getSimpleName() + " <output dir> <num reducers>", options); System.exit(-1); } String outputDir = cmd.getArgs()[0]; int numReducers = Integer.parseInt(cmd.getArgs()[1]); return run(outputDir, numReducers, cmd.hasOption("c")); } public int run(String outputDir, int numReducers, boolean concurrent) throws Exception { return run(new Path(outputDir), numReducers, concurrent); } public int run(Path outputDir, int numReducers, boolean concurrent) throws Exception { start(outputDir, numReducers, concurrent); boolean success = job.waitForCompletion(true); return success ? 0 : 1; } public void start(Path outputDir, int numReducers, boolean concurrent) throws GoraException, IOException, Exception { LOG.info("Running Verify with outputDir=" + outputDir + ", numReducers=" + numReducers); DataStore<Long, CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class, new Configuration()); job = new Job(getConf()); if (!job.getConfiguration().get("io.serializations") .contains("org.apache.hadoop.io.serializer.JavaSerialization")) { job.getConfiguration().set("io.serializations", job.getConfiguration().get("io.serializations") + ",org.apache.hadoop.io.serializer.JavaSerialization"); } job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); Query<Long, CINode> query = store.newQuery(); if (!concurrent) { // no concurrency filtering, only need prev field query.setFields("prev"); } else { readFlushed(job.getConfiguration()); } GoraMapper.initMapperJob(job, query, store, LongWritable.class, VLongWritable.class, VerifyMapper.class, true); job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); store.close(); job.submit(); } public boolean isComplete() throws IOException { return job.isComplete(); } public boolean isSuccessful() throws IOException { return job.isSuccessful(); } public boolean waitForCompletion() throws IOException, InterruptedException, ClassNotFoundException { return job.waitForCompletion(true); } private void readFlushed(Configuration conf) throws Exception { DataStore<Utf8, Flushed> flushedTable = DataStoreFactory.getDataStore(Utf8.class, Flushed.class, conf); Query<Utf8, Flushed> query = flushedTable.newQuery(); Result<Utf8, Flushed> result = flushedTable.execute(query); ArrayList<String> flushedEntries = new ArrayList<String>(); while (result.next()) { flushedEntries.add(result.getKey() + ":" + result.get().getCount()); } conf.setStrings("org.apache.gora.goraci.verify.flushed", flushedEntries.toArray(new String[] {})); flushedTable.close(); } public boolean verify(long expectedReferenced) throws Exception { if (job == null) { throw new IllegalStateException("You should call run() first"); } Counters counters = job.getCounters(); Counter referenced = counters.findCounter(Counts.REFERENCED); Counter unreferenced = counters.findCounter(Counts.UNREFERENCED); Counter undefined = counters.findCounter(Counts.UNDEFINED); boolean success = true; //assert if (expectedReferenced != referenced.getValue()) { LOG.error("Expected referenced count does not match with actual referenced count. " + "expected referenced=" + expectedReferenced + " ,actual=" + referenced.getValue()); success = false; } if (unreferenced.getValue() > 0) { LOG.error("Unreferenced nodes were not expected. Unreferenced count=" + unreferenced.getValue()); success = false; } if (undefined.getValue() > 0) { LOG.error("Found an undefined node. Undefined count=" + undefined.getValue()); success = false; } return success; } public static void main(String[] args) throws Exception { int ret = ToolRunner.run(new Verify(), args); System.exit(ret); } }