List of usage examples for org.apache.commons.lang3 StringEscapeUtils unescapeJava
public static final String unescapeJava(final String input)
Unescapes any Java literals found in the String .
From source file:org.apache.flink.graph.drivers.Graph500.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); env.getConfig().setGlobalJobParameters(parameters); if (!parameters.has("directed")) { throw new ProgramParametrizationException( getUsage("must declare execution mode as '--directed true' or '--directed false'")); }// ww w . j ava2 s .com boolean directed = parameters.getBoolean("directed"); if (!parameters.has("simplify")) { throw new ProgramParametrizationException( getUsage("must declare '--simplify true' or '--simplify false'")); } boolean simplify = parameters.getBoolean("simplify"); // Generate RMat graph int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount).generate(); if (directed) { if (simplify) { graph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>()); } } else { if (simplify) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( clipAndFlip)); } else { graph = graph.getUndirected(); } } DataSet<Tuple2<LongValue, LongValue>> edges = graph.getEdges().project(0, 1); // Print, hash, or write RMat graph to disk switch (parameters.get("output", "")) { case "print": System.out.println(); edges.print(); break; case "hash": System.out.println(); System.out.println(DataSetUtils.checksumHashCode(edges)); break; case "csv": String filename = parameters.getRequired("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); edges.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute("Graph500"); break; default: throw new ProgramParametrizationException(getUsage("invalid output type")); } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.drivers.GraphMetrics.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); env.getConfig().setGlobalJobParameters(parameters); if (!parameters.has("directed")) { throw new ProgramParametrizationException( getUsage("must declare execution mode as '--directed true' or '--directed false'")); }/*from w ww. j av a 2s. c o m*/ boolean directedAlgorithm = parameters.getBoolean("directed"); GraphAnalytic vm; GraphAnalytic em; switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class); if (directedAlgorithm) { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>()); } vm = graph.run( new org.apache.flink.graph.library.metric.directed.VertexMetrics<LongValue, NullValue, NullValue>()); em = graph.run( new org.apache.flink.graph.library.metric.directed.EdgeMetrics<LongValue, NullValue, NullValue>()); } else { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( false)); } vm = graph.run( new org.apache.flink.graph.library.metric.undirected.VertexMetrics<LongValue, NullValue, NullValue>()); em = graph.run( new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<LongValue, NullValue, NullValue>()); } } break; case "string": { Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class); if (directedAlgorithm) { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>()); } vm = graph.run( new org.apache.flink.graph.library.metric.directed.VertexMetrics<StringValue, NullValue, NullValue>()); em = graph.run( new org.apache.flink.graph.library.metric.directed.EdgeMetrics<StringValue, NullValue, NullValue>()); } else { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>( false)); } vm = graph.run( new org.apache.flink.graph.library.metric.undirected.VertexMetrics<StringValue, NullValue, NullValue>()); em = graph.run( new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<StringValue, NullValue, NullValue>()); } } break; default: throw new ProgramParametrizationException(getUsage("invalid CSV type")); } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (directedAlgorithm) { if (scale > 32) { Graph<LongValue, NullValue, NullValue> newGraph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>()); vm = newGraph.run( new org.apache.flink.graph.library.metric.directed.VertexMetrics<LongValue, NullValue, NullValue>()); em = newGraph.run( new org.apache.flink.graph.library.metric.directed.EdgeMetrics<LongValue, NullValue, NullValue>()); } else { Graph<IntValue, NullValue, NullValue> newGraph = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToUnsignedIntValue())) .run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>()); vm = newGraph.run( new org.apache.flink.graph.library.metric.directed.VertexMetrics<IntValue, NullValue, NullValue>()); em = newGraph.run( new org.apache.flink.graph.library.metric.directed.EdgeMetrics<IntValue, NullValue, NullValue>()); } } else { boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); if (scale > 32) { Graph<LongValue, NullValue, NullValue> newGraph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( clipAndFlip)); vm = newGraph.run( new org.apache.flink.graph.library.metric.undirected.VertexMetrics<LongValue, NullValue, NullValue>()); em = newGraph.run( new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<LongValue, NullValue, NullValue>()); } else { Graph<IntValue, NullValue, NullValue> newGraph = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToUnsignedIntValue())) .run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>( clipAndFlip)); vm = newGraph.run( new org.apache.flink.graph.library.metric.undirected.VertexMetrics<IntValue, NullValue, NullValue>()); em = newGraph.run( new org.apache.flink.graph.library.metric.undirected.EdgeMetrics<IntValue, NullValue, NullValue>()); } } } break; default: throw new ProgramParametrizationException(getUsage("invalid input type")); } env.execute("Graph Metrics"); System.out.println(); System.out.print("Vertex metrics:\n "); System.out.println(vm.getResult().toString().replace(";", "\n ")); System.out.println(); System.out.print("Edge metrics:\n "); System.out.println(em.getResult().toString().replace(";", "\n ")); JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.drivers.HITS.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); env.getConfig().setGlobalJobParameters(parameters); int iterations = parameters.getInt("iterations", DEFAULT_ITERATIONS); DataSet hits;//from ww w .j a v a 2 s . c o m switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { hits = reader.keyType(LongValue.class) .run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>( iterations)); } break; case "string": { hits = reader.keyType(StringValue.class).run( new org.apache.flink.graph.library.link_analysis.HITS<StringValue, NullValue, NullValue>( iterations)); } break; default: throw new ProgramParametrizationException(getUsage("invalid CSV type")); } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (scale > 32) { hits = graph.run(new Simplify<LongValue, NullValue, NullValue>()) .run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>( iterations)); } else { hits = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToUnsignedIntValue())) .run(new Simplify<IntValue, NullValue, NullValue>()) .run(new org.apache.flink.graph.library.link_analysis.HITS<IntValue, NullValue, NullValue>( iterations)); } } break; default: throw new ProgramParametrizationException(getUsage("invalid input type")); } switch (parameters.get("output", "")) { case "print": System.out.println(); for (Object e : hits.collect()) { System.out.println(((Result) e).toPrintableString()); } break; case "hash": System.out.println(); System.out.println(DataSetUtils.checksumHashCode(hits)); break; case "csv": String filename = parameters.getRequired("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); hits.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute("HITS"); break; default: throw new ProgramParametrizationException(getUsage("invalid output type")); } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.drivers.JaccardIndex.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); env.getConfig().setGlobalJobParameters(parameters); int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT); DataSet ji;// ww w. j av a 2 s .co m switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class); if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( false).setParallelism(little_parallelism)); } ji = graph.run( new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } break; case "string": { Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class); if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>( false).setParallelism(little_parallelism)); } ji = graph.run( new org.apache.flink.graph.library.similarity.JaccardIndex<StringValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } break; default: throw new ProgramParametrizationException(getUsage("invalid CSV type")); } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .setParallelism(little_parallelism).generate(); boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); if (scale > 32) { ji = graph .run(new Simplify<LongValue, NullValue, NullValue>(clipAndFlip) .setParallelism(little_parallelism)) .run(new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } else { ji = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)) .run(new Simplify<IntValue, NullValue, NullValue>(clipAndFlip) .setParallelism(little_parallelism)) .run(new org.apache.flink.graph.library.similarity.JaccardIndex<IntValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } } break; default: throw new ProgramParametrizationException(getUsage("invalid input type")); } switch (parameters.get("output", "")) { case "print": System.out.println(); for (Object e : ji.collect()) { Result result = (Result) e; System.out.println(result.toPrintableString()); } break; case "hash": System.out.println(); System.out.println(DataSetUtils.checksumHashCode(ji)); break; case "csv": String filename = parameters.getRequired("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); ji.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute("Jaccard Index"); break; default: throw new ProgramParametrizationException(getUsage("invalid output type")); } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.drivers.TriangleListing.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); env.getConfig().setGlobalJobParameters(parameters); if (!parameters.has("directed")) { throw new ProgramParametrizationException( getUsage("must declare execution mode as '--directed true' or '--directed false'")); }/*w ww .j av a2s . c o m*/ boolean directedAlgorithm = parameters.getBoolean("directed"); int little_parallelism = parameters.getInt("little_parallelism", PARALLELISM_DEFAULT); boolean triadic_census = parameters.getBoolean("triadic_census", DEFAULT_TRIADIC_CENSUS); GraphAnalytic tc = null; DataSet tl; switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.getRequired("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class); if (directedAlgorithm) { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>() .setParallelism(little_parallelism)); } if (triadic_census) { tc = graph.run( new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = graph.run( new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } else { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( false).setParallelism(little_parallelism)); } if (triadic_census) { tc = graph.run( new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = graph.run( new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } } break; case "string": { Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class); if (directedAlgorithm) { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<StringValue, NullValue, NullValue>() .setParallelism(little_parallelism)); } if (triadic_census) { tc = graph.run( new org.apache.flink.graph.library.clustering.directed.TriadicCensus<StringValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = graph.run( new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } else { if (parameters.getBoolean("simplify", false)) { graph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<StringValue, NullValue, NullValue>( false).setParallelism(little_parallelism)); } if (triadic_census) { tc = graph.run( new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<StringValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = graph.run( new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } } break; default: throw new ProgramParametrizationException(getUsage("invalid CSV type")); } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (directedAlgorithm) { if (scale > 32) { Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>() .setParallelism(little_parallelism)); if (triadic_census) { tc = simpleGraph.run( new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = simpleGraph.run( new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } else { Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>() .setParallelism(little_parallelism)); if (triadic_census) { tc = simpleGraph.run( new org.apache.flink.graph.library.clustering.directed.TriadicCensus<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = simpleGraph.run( new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } } else { boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); if (scale > 32) { Graph<LongValue, NullValue, NullValue> simpleGraph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( clipAndFlip).setParallelism(little_parallelism)); if (triadic_census) { tc = simpleGraph.run( new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = simpleGraph.run( new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } else { Graph<IntValue, NullValue, NullValue> simpleGraph = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToUnsignedIntValue()).setParallelism(little_parallelism)) .run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>( clipAndFlip).setParallelism(little_parallelism)); if (triadic_census) { tc = simpleGraph.run( new org.apache.flink.graph.library.clustering.undirected.TriadicCensus<IntValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } tl = simpleGraph.run( new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>() .setLittleParallelism(little_parallelism)); } } } break; default: throw new ProgramParametrizationException(getUsage("invalid input type")); } switch (parameters.get("output", "")) { case "print": System.out.println(); if (directedAlgorithm) { for (Object e : tl.collect()) { org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e; System.out.println(result.toPrintableString()); } } else { tl.print(); } break; case "hash": System.out.println(); System.out.println(DataSetUtils.checksumHashCode(tl)); break; case "csv": String filename = parameters.getRequired("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute(); break; default: throw new ProgramParametrizationException(getUsage("invalid output type")); } if (tc != null) { System.out.print("Triadic census:\n "); System.out.println(tc.getResult().toString().replace(";", "\n ")); } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.examples.ClusteringCoefficient.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); if (!parameters.has("directed")) { printUsage();/*from w ww .j a v a2s . c o m*/ return; } boolean directedAlgorithm = parameters.getBoolean("directed"); // global and local clustering coefficient results GraphAnalytic gcc; DataSet lcc; switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.get("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class); if (directedAlgorithm) { gcc = graph.run( new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>()); lcc = graph.run( new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<LongValue, NullValue, NullValue>()); } else { gcc = graph.run( new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>()); lcc = graph.run( new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<LongValue, NullValue, NullValue>()); } } break; case "string": { Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class); if (directedAlgorithm) { gcc = graph.run( new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<StringValue, NullValue, NullValue>()); lcc = graph.run( new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<StringValue, NullValue, NullValue>()); } else { gcc = graph.run( new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<StringValue, NullValue, NullValue>()); lcc = graph.run( new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<StringValue, NullValue, NullValue>()); } } break; default: printUsage(); return; } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (directedAlgorithm) { if (scale > 32) { Graph<LongValue, NullValue, NullValue> newGraph = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>()); gcc = newGraph.run( new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>()); lcc = newGraph.run( new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<LongValue, NullValue, NullValue>() .setIncludeZeroDegreeVertices(false)); } else { Graph<IntValue, NullValue, NullValue> newGraph = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToIntValue())) .run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>()); gcc = newGraph.run( new org.apache.flink.graph.library.clustering.directed.GlobalClusteringCoefficient<IntValue, NullValue, NullValue>()); lcc = newGraph.run( new org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient<IntValue, NullValue, NullValue>() .setIncludeZeroDegreeVertices(false)); } } else { boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); if (scale > 32) { Graph<LongValue, NullValue, NullValue> newGraph = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( clipAndFlip)); gcc = newGraph.run( new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<LongValue, NullValue, NullValue>()); lcc = newGraph.run( new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<LongValue, NullValue, NullValue>() .setIncludeZeroDegreeVertices(false)); } else { Graph<IntValue, NullValue, NullValue> newGraph = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToIntValue())) .run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>( clipAndFlip)); gcc = newGraph.run( new org.apache.flink.graph.library.clustering.undirected.GlobalClusteringCoefficient<IntValue, NullValue, NullValue>()); lcc = newGraph.run( new org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient<IntValue, NullValue, NullValue>() .setIncludeZeroDegreeVertices(false)); } } } break; default: printUsage(); return; } switch (parameters.get("output", "")) { case "print": if (directedAlgorithm) { for (Object e : lcc.collect()) { org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient.Result result = (org.apache.flink.graph.library.clustering.directed.LocalClusteringCoefficient.Result) e; System.out.println(result.toVerboseString()); } } else { for (Object e : lcc.collect()) { org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient.Result result = (org.apache.flink.graph.library.clustering.undirected.LocalClusteringCoefficient.Result) e; System.out.println(result.toVerboseString()); } } System.out.println(gcc.getResult()); break; case "hash": System.out.println(DataSetUtils.checksumHashCode(lcc)); System.out.println(gcc.getResult()); break; case "csv": String filename = parameters.get("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); lcc.writeAsCsv(filename, lineDelimiter, fieldDelimiter); System.out.println(gcc.execute()); break; default: printUsage(); return; } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.examples.HITS.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); int iterations = parameters.getInt("iterations", DEFAULT_ITERATIONS); DataSet hits;//from w ww .j a v a 2s . co m switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.get("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { hits = reader.keyType(LongValue.class) .run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>( iterations)); } break; case "string": { hits = reader.keyType(StringValue.class).run( new org.apache.flink.graph.library.link_analysis.HITS<StringValue, NullValue, NullValue>( iterations)); } break; default: printUsage(); return; } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (scale > 32) { hits = graph.run(new Simplify<LongValue, NullValue, NullValue>()) .run(new org.apache.flink.graph.library.link_analysis.HITS<LongValue, NullValue, NullValue>( iterations)); } else { hits = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToIntValue())) .run(new Simplify<IntValue, NullValue, NullValue>()) .run(new org.apache.flink.graph.library.link_analysis.HITS<IntValue, NullValue, NullValue>( iterations)); } } break; default: printUsage(); return; } switch (parameters.get("output", "")) { case "print": for (Object e : hits.collect()) { System.out.println(((Result) e).toVerboseString()); } break; case "hash": System.out.println(DataSetUtils.checksumHashCode(hits)); break; case "csv": String filename = parameters.get("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); hits.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute(); break; default: printUsage(); return; } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.examples.JaccardIndex.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); DataSet ji;//from w w w. j a va2 s .com switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.get("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { ji = reader.keyType(LongValue.class).run( new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>()); } break; case "string": { ji = reader.keyType(StringValue.class).run( new org.apache.flink.graph.library.similarity.JaccardIndex<StringValue, NullValue, NullValue>()); } break; default: printUsage(); return; } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (scale > 32) { ji = graph.run(new Simplify<LongValue, NullValue, NullValue>(clipAndFlip)).run( new org.apache.flink.graph.library.similarity.JaccardIndex<LongValue, NullValue, NullValue>()); } else { ji = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToIntValue())) .run(new Simplify<IntValue, NullValue, NullValue>(clipAndFlip)) .run(new org.apache.flink.graph.library.similarity.JaccardIndex<IntValue, NullValue, NullValue>()); } } break; default: printUsage(); return; } switch (parameters.get("output", "")) { case "print": for (Object e : ji.collect()) { Result result = (Result) e; System.out.println(result.toVerboseString()); } break; case "hash": System.out.println(DataSetUtils.checksumHashCode(ji)); break; case "csv": String filename = parameters.get("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); ji.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute(); break; default: printUsage(); return; } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.flink.graph.examples.TriangleListing.java
public static void main(String[] args) throws Exception { // Set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); ParameterTool parameters = ParameterTool.fromArgs(args); if (!parameters.has("directed")) { printUsage();/*from w w w . j av a 2s . c o m*/ return; } boolean directedAlgorithm = parameters.getBoolean("directed"); DataSet tl; switch (parameters.get("input", "")) { case "csv": { String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils .unescapeJava(parameters.get("input_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); GraphCsvReader reader = Graph.fromCsvReader(parameters.get("input_filename"), env) .ignoreCommentsEdges("#").lineDelimiterEdges(lineDelimiter).fieldDelimiterEdges(fieldDelimiter); switch (parameters.get("type", "")) { case "integer": { Graph<LongValue, NullValue, NullValue> graph = reader.keyType(LongValue.class); if (directedAlgorithm) { tl = graph.run( new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>()); } else { tl = graph.run( new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>()); } } break; case "string": { Graph<StringValue, NullValue, NullValue> graph = reader.keyType(StringValue.class); if (directedAlgorithm) { tl = graph.run( new org.apache.flink.graph.library.clustering.directed.TriangleListing<StringValue, NullValue, NullValue>()); } else { tl = graph.run( new org.apache.flink.graph.library.clustering.undirected.TriangleListing<StringValue, NullValue, NullValue>()); } } break; default: printUsage(); return; } } break; case "rmat": { int scale = parameters.getInt("scale", DEFAULT_SCALE); int edgeFactor = parameters.getInt("edge_factor", DEFAULT_EDGE_FACTOR); RandomGenerableFactory<JDKRandomGenerator> rnd = new JDKRandomGeneratorFactory(); long vertexCount = 1L << scale; long edgeCount = vertexCount * edgeFactor; Graph<LongValue, NullValue, NullValue> graph = new RMatGraph<>(env, rnd, vertexCount, edgeCount) .generate(); if (directedAlgorithm) { if (scale > 32) { tl = graph.run( new org.apache.flink.graph.asm.simple.directed.Simplify<LongValue, NullValue, NullValue>()) .run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<LongValue, NullValue, NullValue>()); } else { tl = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToIntValue())) .run(new org.apache.flink.graph.asm.simple.directed.Simplify<IntValue, NullValue, NullValue>()) .run(new org.apache.flink.graph.library.clustering.directed.TriangleListing<IntValue, NullValue, NullValue>()); } } else { boolean clipAndFlip = parameters.getBoolean("clip_and_flip", DEFAULT_CLIP_AND_FLIP); graph = graph.run(new Simplify<LongValue, NullValue, NullValue>(clipAndFlip)); if (scale > 32) { tl = graph.run( new org.apache.flink.graph.asm.simple.undirected.Simplify<LongValue, NullValue, NullValue>( clipAndFlip)) .run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<LongValue, NullValue, NullValue>()); } else { tl = graph .run(new TranslateGraphIds<LongValue, IntValue, NullValue, NullValue>( new LongValueToIntValue())) .run(new org.apache.flink.graph.asm.simple.undirected.Simplify<IntValue, NullValue, NullValue>( clipAndFlip)) .run(new org.apache.flink.graph.library.clustering.undirected.TriangleListing<IntValue, NullValue, NullValue>()); } } } break; default: printUsage(); return; } switch (parameters.get("output", "")) { case "print": if (directedAlgorithm) { for (Object e : tl.collect()) { org.apache.flink.graph.library.clustering.directed.TriangleListing.Result result = (org.apache.flink.graph.library.clustering.directed.TriangleListing.Result) e; System.out.println(result.toVerboseString()); } } else { tl.print(); } break; case "hash": System.out.println(DataSetUtils.checksumHashCode(tl)); break; case "csv": String filename = parameters.get("output_filename"); String lineDelimiter = StringEscapeUtils .unescapeJava(parameters.get("output_line_delimiter", CsvOutputFormat.DEFAULT_LINE_DELIMITER)); String fieldDelimiter = StringEscapeUtils.unescapeJava( parameters.get("output_field_delimiter", CsvOutputFormat.DEFAULT_FIELD_DELIMITER)); tl.writeAsCsv(filename, lineDelimiter, fieldDelimiter); env.execute(); break; default: printUsage(); return; } JobExecutionResult result = env.getLastJobExecutionResult(); NumberFormat nf = NumberFormat.getInstance(); System.out.println("Execution runtime: " + nf.format(result.getNetRuntime()) + " ms"); }
From source file:org.apache.hadoop.hive.druid.HiveDruidQueryBasedInputFormat.java
@SuppressWarnings("deprecation") private HiveDruidSplit[] getInputSplits(Configuration conf) throws IOException { String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS); if (StringUtils.isEmpty(address)) { throw new IOException("Druid broker address not specified in configuration"); }/*from ww w . j av a 2 s .c o m*/ String druidQuery = StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON)); String druidQueryType; if (StringUtils.isEmpty(druidQuery)) { // Empty, maybe because CBO did not run; we fall back to // full Select query if (LOG.isWarnEnabled()) { LOG.warn("Druid query is empty; creating Select query"); } String dataSource = conf.get(Constants.DRUID_DATA_SOURCE); if (dataSource == null) { throw new IOException("Druid data source cannot be empty"); } druidQuery = createSelectStarQuery(address, dataSource); druidQueryType = Query.SELECT; } else { druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE); if (druidQueryType == null) { throw new IOException("Druid query type not recognized"); } } // hive depends on FileSplits Job job = new Job(conf); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); Path[] paths = FileInputFormat.getInputPaths(jobContext); switch (druidQueryType) { case Query.TIMESERIES: case Query.TOPN: case Query.GROUP_BY: return new HiveDruidSplit[] { new HiveDruidSplit(address, druidQuery, paths[0]) }; case Query.SELECT: return splitSelectQuery(conf, address, druidQuery, paths[0]); default: throw new IOException("Druid query type not recognized"); } }