List of usage examples for com.google.common.collect Iterables size
public static int size(Iterable<?> iterable)
From source file:org.neo4j.nlp.impl.util.PageRankSpark.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);/* w w w. j av a 2 s . c o m*/ } SparkConf sparkConf = new SparkConf().setAppName("Graphify"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 1); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> { String[] parts = SPACES.split(s); return new Tuple2<>(parts[0], parts[1]); }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[1]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(s -> { int urlCount = Iterables.size(s._1()); List<Tuple2<String, Double>> results = new ArrayList<>(); for (String n : s._1()) { results.add(new Tuple2<>(n, s._2() / urlCount)); } return results; }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } ctx.stop(); }
From source file:org.apache.spark.examples.PageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: PageRank <file> <number_of_iterations>"); System.exit(1);//from w ww . j a v a2 s. co m } final SparkConf sparkConf = new SparkConf().setAppName("PageRank"); final JavaSparkContext ctx = new JavaSparkContext(sparkConf); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... final JavaRDD<String> lines = ctx.textFile(args[0], 1); final int ITERATIONS = Integer.parseInt(args[1]); // Loads all URLs from input file and initialize their neighbors. final JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> { String[] parts = SPACES.split(s); return new Tuple2<>(parts[0], parts[1]); }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < ITERATIONS; current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(s -> { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<>(); for (String n : s._1) { results.add(new Tuple2<>(n, s._2() / urlCount)); } return results; }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey((a, b) -> a + b).mapValues(sum -> 0.15 + sum * 0.85); } // Collects all URL ranks and dump them to console. final List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2 tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } ctx.stop(); }
From source file:gtl.spark.java.example.apache.JavaPageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);//from w w w .ja v a 2 s . c o m } showWarning(); SparkSession spark = SparkSession.builder().appName("JavaPageRank").getOrCreate(); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD(); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> { String[] parts = SPACES.split(s); return new Tuple2<>(parts[0], parts[1]); }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[1]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values().flatMapToPair(s -> { int urlCount = Iterables.size(s._1()); List<Tuple2<String, Double>> results = new ArrayList<>(); for (String n : s._1) { results.add(new Tuple2<>(n, s._2() / urlCount)); } return results.iterator(); }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } spark.stop(); }
From source file:com.spark.cis833.extra.SparkPageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: SparkPageRank <input> <output>"); System.exit(1);/*from w w w. j av a 2 s . c o m*/ } SparkConf sparkConf = new SparkConf().setAppName("SparkPageRank"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = ctx.textFile(args[0], 1); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() { @Override public Tuple2<String, String> call(String s) { String[] parts = SPACES.split(s); return new Tuple2<String, String>(parts[0], parts[1]); } }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() { @Override public Double call(Iterable<String> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt("10"); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() { @Override public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>(); for (String n : s._1) { results.add(new Tuple2<String, Double>(n, s._2() / urlCount)); } return results; } }).sortByKey(false); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override public Double call(Double sum) { return 0.15 + sum * 0.85; } }).sortByKey(false); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } JavaPairRDD<Double, String> swap1 = ranks .mapToPair(new PairFunction<Tuple2<String, Double>, Double, String>() { @Override public Tuple2<Double, String> call(Tuple2<String, Double> item) throws Exception { return item.swap(); } }).sortByKey(false); swap1.saveAsTextFile(args[1]); ctx.stop(); }
From source file:com.jyz.study.hadoop.spark.examples.JavaPageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);/*from ww w . ja v a 2s .co m*/ } SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = ctx.textFile(args[0], 1); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() { @Override public Tuple2<String, String> call(String s) { String[] parts = SPACES.split(s); return new Tuple2<String, String>(parts[0], parts[1]); } }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() { @Override public Double call(Iterable<String> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[1]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() { @Override public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>(); for (String n : s._1) { results.add(new Tuple2<String, Double>(n, s._2() / urlCount)); } return results; } }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override public Double call(Double sum) { return 0.15 + sum * 0.85; } }); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } ctx.stop(); }
From source file:JavaPageRank.java
public static void main(String[] args) throws Exception { Integer iterationNum = 5;//from w w w. j a v a 2 s.c o m String path = JavaWordCountDF.class.getClassLoader().getResource("pagerank_data.txt").getPath(); showWarning(); SparkSession spark = SparkSession.builder().master("local").appName("JavaPageRank").getOrCreate(); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = spark.read().textFile(path).javaRDD(); // Loads all URLs from input file and initialize their neighbors. // lambda type: (PairFunction<String, String, String>) JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(s -> { String[] parts = SPACES.split(s); return new Tuple2<>(parts[0], parts[1]); }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. // Lambda type: (Function<Iterable<String>, Double>) JavaPairRDD<String, Double> ranks = links.mapValues(rs -> 1.0); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < iterationNum; current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() // (PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>) .flatMapToPair(s -> { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<>(); for (String n : s._1) { results.add(new Tuple2<>(n, s._2() / urlCount)); } return results.iterator(); }); // Re-calculates URL ranks based on neighbor contributions. // (Function<Double, Double>) type of sum ranks = contribs.reduceByKey(new Sum()).mapValues(sum -> 0.15 + sum * 0.85); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } spark.stop(); }
From source file:com.andado.spark.examples.JavaPageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);/*from w ww.j a v a 2 s . co m*/ } showWarning(); SparkSession spark = SparkSession.builder().appName("JavaPageRank").getOrCreate(); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = spark.read().textFile(args[0]).javaRDD(); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() { @Override public Tuple2<String, String> call(String s) { String[] parts = SPACES.split(s); return new Tuple2<>(parts[0], parts[1]); } }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() { @Override public Double call(Iterable<String> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[1]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() { @Override public Iterator<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<>(); for (String n : s._1) { results.add(new Tuple2<>(n, s._2() / urlCount)); } return results.iterator(); } }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override public Double call(Double sum) { return 0.15 + sum * 0.85; } }); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } spark.stop(); }
From source file:com.hxr.bigdata.spark.example141.JavaPageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);//from w w w . j a v a 2s . c o m } showWarning(); SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = ctx.textFile(args[0], 1); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() { public Tuple2<String, String> call(String s) { String[] parts = SPACES.split(s); return new Tuple2<String, String>(parts[0], parts[1]); } }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() { public Double call(Iterable<String> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[1]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() { public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>(); for (String n : s._1) { results.add(new Tuple2<String, Double>(n, s._2() / urlCount)); } return results; } }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { public Double call(Double sum) { return 0.15 + sum * 0.85; } }); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } ctx.stop(); }
From source file:com.sdw.dream.spark.examples.JavaPageRank.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);// w w w .j av a 2s . co m } showWarning(); SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... JavaRDD<String> lines = ctx.textFile(args[0], 1); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() { @Override public Tuple2<String, String> call(String s) { String[] parts = SPACES.split(s); return new Tuple2<String, String>(parts[0], parts[1]); } }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() { @Override public Double call(Iterable<String> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[1]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<String, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() { @Override public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) { int urlCount = Iterables.size(s._1); List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>(); for (String n : s._1) { results.add(new Tuple2<String, Double>(n, s._2() / urlCount)); } return results; } }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override public Double call(Double sum) { return 0.15 + sum * 0.85; } }); } // Collects all URL ranks and dump them to console. List<Tuple2<String, Double>> output = ranks.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); } ctx.stop(); }
From source file:org.biocaddie.citationanalysis.metrics.JavaPageRankInt.java
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaPageRank <file> <number_of_iterations>"); System.exit(1);//from w w w . j a v a 2s . c o m } double alpha = 0.5; JavaSparkContext ctx = SparkUtils.getJavaSparkContext("JavaPageRank"); // Loads in input file. It should be in format of: // URL neighbor URL // URL neighbor URL // URL neighbor URL // ... // JavaRDD<String> lines = ctx.textFile(args[0], 1); JavaRDD<String> lines = ctx.textFile(args[0]); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD<Integer, Iterable<Integer>> links = lines .mapToPair(new PairFunction<String, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(String s) { String[] parts = SPACES.split(s); return new Tuple2<Integer, Integer>(Integer.parseInt(parts[0]), Integer.parseInt(parts[1])); } }).distinct().groupByKey().cache(); // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one. JavaPairRDD<Integer, Double> ranks = links.mapValues(new Function<Iterable<Integer>, Double>() { @Override public Double call(Iterable<Integer> rs) { return 1.0; } }); // Calculates and updates URL ranks continuously using PageRank algorithm. for (int current = 0; current < Integer.parseInt(args[2]); current++) { // Calculates URL contributions to the rank of other URLs. JavaPairRDD<Integer, Double> contribs = links.join(ranks).values() .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<Integer>, Double>, Integer, Double>() { @Override public Iterable<Tuple2<Integer, Double>> call(Tuple2<Iterable<Integer>, Double> s) { int urlCount = Iterables.size(s._1); List<Tuple2<Integer, Double>> results = new ArrayList<Tuple2<Integer, Double>>(); for (Integer n : s._1) { results.add(new Tuple2<Integer, Double>(n, s._2() / urlCount)); } return results; } }); // Re-calculates URL ranks based on neighbor contributions. ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() { @Override public Double call(Double sum) { return alpha + sum * 1 - alpha; // return 0.15 + sum * 0.85; } }); } JavaRDD<String> idLines = ctx.textFile(args[1]); JavaPairRDD<Integer, Integer> pmIds = idLines.mapToPair(new PairFunction<String, Integer, Integer>() { @Override public Tuple2<Integer, Integer> call(String s) { String[] parts = s.split(","); return new Tuple2<Integer, Integer>(Integer.parseInt(parts[0]), Integer.parseInt(parts[1])); } }); ranks = ranks.filter(t -> t._2 > 0.8); JavaPairRDD<Integer, Tuple2<Double, Integer>> join = ranks.join(pmIds); List<Tuple2<Integer, Tuple2<Double, Integer>>> collect = join.collect(); for (Tuple2<Integer, Tuple2<Double, Integer>> t : collect) { System.out.println(t._1 + "," + t._2._2 + "," + t._2._1); } // Collects all URL ranks and dump them to console. // List<Tuple2<Integer, Double>> output = ranks.collect(); // for (Tuple2<?,?> tuple : output) { // System.out.println(tuple._1() + " has rank: " + tuple._2() + "."); // } ctx.stop(); }