Java examples for Big Data:apache flink
Apache Flink batch processing engine using the Sacramento Police Department open dataset
import org.apache.flink.api.common.functions.GroupReduceFunction; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.tuple.Tuple1; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.util.Collector; public final class CrimeType { public static void main(final String[] args) throws Exception { final String filename; try {/* ww w.j av a 2 s .com*/ // access the arguments of the command line tool final ParameterTool params = ParameterTool.fromArgs(args); if (!params.has("filename")) { filename = "/tmp/crime.csv"; System.err .println("No filename specified. Please run 'CrimeType " + "--filename <filename>, where filename is the name of the dataset in CSV format"); } else { filename = params.get("filename"); } } catch (Exception ex) { System.err .println("No filename specified. Please run 'CrimeDistrict " + "--filename <filename>, where filename is the name of the dataset in CSV format"); return; } final ExecutionEnvironment env = ExecutionEnvironment .getExecutionEnvironment(); final DataSet<Tuple1<String>> rawdata = env.readCsvFile(filename) .includeFields("01").ignoreFirstLine() .parseQuotedStrings('"').types(String.class); rawdata.groupBy(0).reduceGroup(new CrimeCounter()) .print(); } private final static class CrimeCounter implements GroupReduceFunction<Tuple1<String>, Tuple2<Integer, Integer>> { public void reduce(Iterable<Tuple1<String>> records, Collector<Tuple2<Integer, Integer>> out) throws Exception { String offense = null; int cnt = 0; for (Tuple1<String> m : records) { offense = m.f0; cnt++; } out.collect(new Tuple2<Integer, Integer>(Integer .parseInt(offense), cnt)); } } }