Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cmd; import static org.apache.jena.tdbloader4.Constants.OUTPUT_PATH_POSTFIX_1; import static org.apache.jena.tdbloader4.Constants.OUTPUT_PATH_POSTFIX_2; import static org.apache.jena.tdbloader4.Constants.OUTPUT_PATH_POSTFIX_3; import static org.apache.jena.tdbloader4.Constants.OUTPUT_PATH_POSTFIX_4; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.jena.tdbloader4.Constants; import org.apache.jena.tdbloader4.FirstDriver; import org.apache.jena.tdbloader4.FourthDriver; import org.apache.jena.tdbloader4.SecondDriver; import org.apache.jena.tdbloader4.ThirdDriver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.graph.Graph; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.sparql.core.DatasetGraph; import com.hp.hpl.jena.tdb.TDBFactory; import com.hp.hpl.jena.tdb.TDBLoader; import com.hp.hpl.jena.tdb.base.file.Location; import com.hp.hpl.jena.tdb.store.DatasetGraphTDB; import com.hp.hpl.jena.tdb.sys.SetupTDB; import com.hp.hpl.jena.tdb.transaction.DatasetGraphTransaction; import com.hp.hpl.jena.util.iterator.ExtendedIterator; public class tdbloader4 extends Configured implements Tool { private static final Logger log = LoggerFactory.getLogger(tdbloader4.class); public tdbloader4() { super(); log.debug("constructed with no configuration."); } public tdbloader4(Configuration configuration) { super(configuration); log.debug("constructed with configuration."); } @Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getName()); ToolRunner.printGenericCommandUsage(System.err); return -1; } Configuration configuration = getConf(); configuration.set(Constants.RUN_ID, String.valueOf(System.currentTimeMillis())); boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT, Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT); boolean copyToLocal = configuration.getBoolean(Constants.OPTION_COPY_TO_LOCAL, Constants.OPTION_COPY_TO_LOCAL_DEFAULT); boolean verify = configuration.getBoolean(Constants.OPTION_VERIFY, Constants.OPTION_VERIFY_DEFAULT); boolean runLocal = configuration.getBoolean(Constants.OPTION_RUN_LOCAL, Constants.OPTION_RUN_LOCAL_DEFAULT); FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration); if (overrideOutput) { fs.delete(new Path(args[1]), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_1), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_2), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_3), true); fs.delete(new Path(args[1] + OUTPUT_PATH_POSTFIX_4), true); } if ((copyToLocal) || (runLocal)) { File path = new File(args[1]); path.mkdirs(); } Tool first = new FirstDriver(configuration); int status = first.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_1 }); if (status != 0) { return status; } createOffsetsFile(fs, args[1] + OUTPUT_PATH_POSTFIX_1, args[1] + OUTPUT_PATH_POSTFIX_1); Path offsets = new Path(args[1] + OUTPUT_PATH_POSTFIX_1, Constants.OFFSETS_FILENAME); DistributedCache.addCacheFile(offsets.toUri(), configuration); Tool second = new SecondDriver(configuration); status = second.run(new String[] { args[0], args[1] + OUTPUT_PATH_POSTFIX_2 }); if (status != 0) { return status; } Tool third = new ThirdDriver(configuration); status = third.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_3 }); if (status != 0) { return status; } Tool fourth = new FourthDriver(configuration); status = fourth.run(new String[] { args[1] + OUTPUT_PATH_POSTFIX_3, args[1] + OUTPUT_PATH_POSTFIX_4 }); if (status != 0) { return status; } if (copyToLocal) { Tool download = new download(configuration); download.run( new String[] { args[1] + OUTPUT_PATH_POSTFIX_2, args[1] + OUTPUT_PATH_POSTFIX_4, args[1] }); } if (verify) { DatasetGraphTDB dsgMem = load(args[0]); Location location = new Location(args[1]); if (!copyToLocal) { // TODO: this is a sort of a cheat and it could go away (if it turns out to be too slow)! download.fixNodeTable2(location); } DatasetGraphTDB dsgDisk = SetupTDB.buildDataset(location); boolean isomorphic = isomorphic(dsgMem, dsgDisk); System.out.println("> " + isomorphic); } return status; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new tdbloader4(), args); System.exit(exitCode); } private void createOffsetsFile(FileSystem fs, String input, String output) throws IOException { log.debug("Creating offsets file..."); Map<Long, Long> offsets = new TreeMap<Long, Long>(); FileStatus[] status = fs.listStatus(new Path(input)); for (FileStatus fileStatus : status) { Path file = fileStatus.getPath(); if (file.getName().startsWith("part-r-")) { log.debug("Processing: {}", file.getName()); BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(file))); String line = in.readLine(); String[] tokens = line.split("\\s"); long partition = Long.valueOf(tokens[0]); long offset = Long.valueOf(tokens[1]); log.debug("Partition {} has offset {}", partition, offset); offsets.put(partition, offset); } } Path outputPath = new Path(output, Constants.OFFSETS_FILENAME); PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outputPath))); for (Long partition : offsets.keySet()) { out.println(partition + "\t" + offsets.get(partition)); } out.close(); log.debug("Offset file created."); } public static boolean isomorphic(DatasetGraph dsgMem, DatasetGraph dsgDisk) { if (!dsgMem.getDefaultGraph().isIsomorphicWith(dsgDisk.getDefaultGraph())) return false; Iterator<Node> graphsMem = dsgMem.listGraphNodes(); Iterator<Node> graphsDisk = dsgDisk.listGraphNodes(); Set<Node> seen = new HashSet<Node>(); while (graphsMem.hasNext()) { Node graphNode = graphsMem.next(); if (dsgDisk.getGraph(graphNode) == null) return false; if (!dsgMem.getGraph(graphNode).isIsomorphicWith(dsgDisk.getGraph(graphNode))) return false; seen.add(graphNode); } while (graphsDisk.hasNext()) { Node graphNode = graphsDisk.next(); if (!seen.contains(graphNode)) { if (dsgMem.getGraph(graphNode) == null) return false; if (!dsgMem.getGraph(graphNode).isIsomorphicWith(dsgDisk.getGraph(graphNode))) return false; } } return true; } public static DatasetGraphTDB load(String inputPath) { List<String> urls = new ArrayList<String>(); for (File file : new File(inputPath).listFiles()) { if (file.isFile()) { urls.add(file.getAbsolutePath()); } } DatasetGraphTransaction dsg = (DatasetGraphTransaction) TDBFactory.createDatasetGraph(); TDBLoader.load(dsg.getBaseDatasetGraph(), urls); return dsg.getBaseDatasetGraph(); } public static String dump(DatasetGraph dsgMem, DatasetGraph dsgDisk) { StringBuffer sb = new StringBuffer(); sb.append("\n"); if (!dsgMem.getDefaultGraph().isIsomorphicWith(dsgDisk.getDefaultGraph())) { sb.append("Default graphs are not isomorphic [FAIL]\n"); sb.append(" First:\n"); dump(sb, dsgMem.getDefaultGraph()); sb.append(" Second:\n"); dump(sb, dsgDisk.getDefaultGraph()); } else { sb.append("Default graphs are isomorphic [OK]\n"); } Iterator<Node> graphsMem = dsgMem.listGraphNodes(); Iterator<Node> graphsDisk = dsgDisk.listGraphNodes(); Set<Node> seen = new HashSet<Node>(); while (graphsMem.hasNext()) { Node graphNode = graphsMem.next(); if (dsgDisk.getGraph(graphNode) == null) sb.append(graphNode + " is present only in one dataset. [FAIL]\n"); if (!dsgMem.getGraph(graphNode).isIsomorphicWith(dsgDisk.getGraph(graphNode))) { sb.append("\n" + graphNode + " graphs are not isomorphic [FAIL]\n"); sb.append(" First:\n"); dump(sb, dsgMem.getGraph(graphNode)); sb.append(" Second:\n"); dump(sb, dsgDisk.getGraph(graphNode)); } seen.add(graphNode); } while (graphsDisk.hasNext()) { Node graphNode = graphsDisk.next(); if (!seen.contains(graphNode)) { if (dsgMem.getGraph(graphNode) == null) sb.append(graphNode + " is present only in one dataset. [FAIL]\n"); if (!dsgMem.getGraph(graphNode).isIsomorphicWith(dsgDisk.getGraph(graphNode))) { sb.append("\n" + graphNode + " graphs are not isomorphic [FAIL]\n"); sb.append(" First:\n"); dump(sb, dsgMem.getGraph(graphNode)); sb.append(" Second:\n"); dump(sb, dsgDisk.getGraph(graphNode)); } } } return sb.toString(); } private static void dump(StringBuffer sb, Graph graph) { ExtendedIterator<Triple> iter = graph.find(Node.ANY, Node.ANY, Node.ANY); while (iter.hasNext()) { Triple triple = iter.next(); sb.append(triple).append("\n"); } } }