Java examples for Big Data:apache spark
reduce operation on apache spark
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.Function; import scala.Tuple2; import java.util.Arrays; import java.util.regex.Pattern; public class Main { private static final Pattern COMMA = Pattern.compile(","); public static void main(String[] args) throws Exception { String inputPath = args[0];/* ww w.ja v a 2 s.c om*/ String outputPath = args[1]; // Read in file JavaSparkContext sc = new JavaSparkContext(); JavaRDD<String> lines = sc.textFile(inputPath); // Get key (district) value (voter number) pair JavaRDD<String> district = lines.flatMap(line -> Arrays.asList(COMMA.split(line)[0]).iterator()); JavaRDD<Integer> count = lines.flatMap(line -> Arrays.asList(Integer.parseInt(COMMA.split(line)[3])).iterator()); JavaPairRDD<String, Integer> tempPair = district.zip(count); tempPair.collect(); // Reduce add up voters in the same district JavaPairRDD<String, Integer> districtVoter = tempPair.reduceByKey((x, y) -> x + y); JavaPairRDD<String, Integer> sorted = districtVoter.sortByKey(); sorted.coalesce(1).saveAsTextFile(outputPath); sc.stop(); } }