Read and parse data for apache spark

Description

Demo Code


import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.function.Function;
import scala.Tuple2;
import java.util.Arrays;
import java.util.regex.Pattern;

public class Main {

  private static final Pattern COMMA = Pattern.compile(",");

  public static void main(String[] args) throws Exception {

    String inputPath = args[0];//  w ww .  java2 s.  c  om
    String outputPath = args[1];

    // Read in file and parse data
    JavaSparkContext sc = new JavaSparkContext();
    String regex = "[()]";
    JavaRDD<Integer> districtVoter = sc.textFile(inputPath)
        .map(line -> Integer.parseInt(COMMA.split(line.replaceAll(regex, ""))[1]));
    int totalVoter = districtVoter.reduce((x, y) -> x + y);
    long count = districtVoter.count();
    Double votermean = ((double) totalVoter) / count;
    JavaRDD<Double> mean = sc.parallelize(Arrays.asList(votermean));

    System.out.println("The mean voter is: " + votermean);
    mean.coalesce(1).saveAsTextFile(outputPath);
    sc.stop();
  }
}

Read and parse data for apache spark - Java Big Data

Description

Demo Code

Related Tutorials