Apache Flink batch processing engine using the Sacramento Police Department open dataset

Description

Demo Code


import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.util.Collector;


public final class CrimeType {


    public static void main(final String[] args) throws Exception {

        final String filename;
        try {/*  ww w.j  av  a 2 s .com*/
            // access the arguments of the command line tool
            final ParameterTool params = ParameterTool.fromArgs(args);
            if (!params.has("filename")) {
                filename = "/tmp/crime.csv";
                System.err
                        .println("No filename specified. Please run 'CrimeType "
                                + "--filename <filename>, where filename is the name of the dataset in CSV format");
            } else {
                filename = params.get("filename");
            }

        } catch (Exception ex) {
            System.err
                    .println("No filename specified. Please run 'CrimeDistrict "
                            + "--filename <filename>, where filename is the name of the dataset in CSV format");
            return;
        }

        final ExecutionEnvironment env = ExecutionEnvironment
                .getExecutionEnvironment();

        final DataSet<Tuple1<String>> rawdata = env.readCsvFile(filename)
                .includeFields("01").ignoreFirstLine()
                .parseQuotedStrings('"').types(String.class);

        rawdata.groupBy(0).reduceGroup(new CrimeCounter())
                .print();
    }

    private final static class CrimeCounter implements
            GroupReduceFunction<Tuple1<String>, Tuple2<Integer, Integer>> {
        public void reduce(Iterable<Tuple1<String>> records,
                Collector<Tuple2<Integer, Integer>> out) throws Exception {

            String offense = null;
            int cnt = 0;
            for (Tuple1<String> m : records) {
                offense = m.f0;
                cnt++;
            }
            out.collect(new Tuple2<Integer, Integer>(Integer
                    .parseInt(offense), cnt));
        }
    }

}

Apache Flink batch processing engine using the Sacramento Police Department open dataset - Java Big Data

Description

Demo Code

Related Tutorials