Java examples for Big Data:apache spark
Read and parse data for apache spark
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.function.Function; import scala.Tuple2; import java.util.Arrays; import java.util.regex.Pattern; public class Main { private static final Pattern COMMA = Pattern.compile(","); public static void main(String[] args) throws Exception { String inputPath = args[0];// w ww . java2 s. c om String outputPath = args[1]; // Read in file and parse data JavaSparkContext sc = new JavaSparkContext(); String regex = "[()]"; JavaRDD<Integer> districtVoter = sc.textFile(inputPath) .map(line -> Integer.parseInt(COMMA.split(line.replaceAll(regex, ""))[1])); int totalVoter = districtVoter.reduce((x, y) -> x + y); long count = districtVoter.count(); Double votermean = ((double) totalVoter) / count; JavaRDD<Double> mean = sc.parallelize(Arrays.asList(votermean)); System.out.println("The mean voter is: " + votermean); mean.coalesce(1).saveAsTextFile(outputPath); sc.stop(); } }