Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.metron.pcap.mr; import static org.apache.metron.pcap.PcapHelper.greaterThanOrEqualTo; import static org.apache.metron.pcap.PcapHelper.lessThanOrEqualTo; import com.google.common.base.Joiner; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.UUID; import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.metron.common.hadoop.SequenceFileIterable; import org.apache.metron.pcap.PacketInfo; import org.apache.metron.pcap.PcapHelper; import org.apache.metron.pcap.filter.PcapFilter; import org.apache.metron.pcap.filter.PcapFilterConfigurator; import org.apache.metron.pcap.filter.PcapFilters; import org.apache.metron.pcap.utils.FileFilterUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class PcapJob { private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String START_TS_CONF = "start_ts"; public static final String END_TS_CONF = "end_ts"; public static final String WIDTH_CONF = "width"; public static enum PCAP_COUNTER { MALFORMED_PACKET_COUNT } public static class PcapPartitioner extends Partitioner<LongWritable, BytesWritable> implements Configurable { private Configuration configuration; Long start = null; Long end = null; Long width = null; @Override public int getPartition(LongWritable longWritable, BytesWritable bytesWritable, int numPartitions) { if (start == null) { initialize(); } long x = longWritable.get(); int ret = (int) Long.divideUnsigned(x - start, width); if (ret > numPartitions) { throw new IllegalArgumentException( String.format("Bad partition: key=%s, width=%d, partition=%d, numPartitions=%d", Long.toUnsignedString(x), width, ret, numPartitions)); } return ret; } private void initialize() { start = Long.parseUnsignedLong(configuration.get(START_TS_CONF)); end = Long.parseUnsignedLong(configuration.get(END_TS_CONF)); width = Long.parseLong(configuration.get(WIDTH_CONF)); } @Override public void setConf(Configuration conf) { this.configuration = conf; } @Override public Configuration getConf() { return configuration; } } public static class PcapMapper extends Mapper<LongWritable, BytesWritable, LongWritable, BytesWritable> { PcapFilter filter; long start; long end; @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); filter = PcapFilters .valueOf(context.getConfiguration().get(PcapFilterConfigurator.PCAP_FILTER_NAME_CONF)).create(); filter.configure(context.getConfiguration()); start = Long.parseUnsignedLong(context.getConfiguration().get(START_TS_CONF)); end = Long.parseUnsignedLong(context.getConfiguration().get(END_TS_CONF)); } @Override protected void map(LongWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { if (greaterThanOrEqualTo(key.get(), start) && lessThanOrEqualTo(key.get(), end)) { // It is assumed that the passed BytesWritable value is always a *single* PacketInfo object. Passing more than 1 // object will result in the whole set being passed through if any pass the filter. We cannot serialize PacketInfo // objects back to byte arrays, otherwise we could support more than one packet. // Note: short-circuit findAny() func on stream List<PacketInfo> packetInfos; try { packetInfos = PcapHelper.toPacketInfo(value.copyBytes()); } catch (Exception e) { // toPacketInfo is throwing RuntimeExceptions. Attempt to catch and count errors with malformed packets context.getCounter(PCAP_COUNTER.MALFORMED_PACKET_COUNT).increment(1); return; } boolean send = filteredPacketInfo(packetInfos).findAny().isPresent(); if (send) { context.write(key, value); } } } private Stream<PacketInfo> filteredPacketInfo(List<PacketInfo> packetInfos) throws IOException { return packetInfos.stream().filter(filter); } } public static class PcapReducer extends Reducer<LongWritable, BytesWritable, LongWritable, BytesWritable> { @Override protected void reduce(LongWritable key, Iterable<BytesWritable> values, Context context) throws IOException, InterruptedException { for (BytesWritable value : values) { context.write(key, value); } } } /** * Returns a lazily-read Iterable over a set of sequence files */ private SequenceFileIterable readResults(Path outputPath, Configuration config, FileSystem fs) throws IOException { List<Path> files = new ArrayList<>(); for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(outputPath, false); it.hasNext();) { Path p = it.next().getPath(); if (p.getName().equals("_SUCCESS")) { fs.delete(p, false); continue; } files.add(p); } LOG.debug("Output path={}", outputPath); Collections.sort(files, (o1, o2) -> o1.getName().compareTo(o2.getName())); return new SequenceFileIterable(files, config); } public <T> SequenceFileIterable query(Path basePath, Path baseOutputPath, long beginNS, long endNS, int numReducers, T fields, Configuration conf, FileSystem fs, PcapFilterConfigurator<T> filterImpl) throws IOException, ClassNotFoundException, InterruptedException { String fileName = Joiner.on("_").join(beginNS, endNS, filterImpl.queryToString(fields), UUID.randomUUID().toString()); if (LOG.isDebugEnabled()) { DateFormat format = SimpleDateFormat.getDateTimeInstance(SimpleDateFormat.LONG, SimpleDateFormat.LONG); String from = format.format(new Date(Long.divideUnsigned(beginNS, 1000000))); String to = format.format(new Date(Long.divideUnsigned(endNS, 1000000))); LOG.debug("Executing query {} on timerange from {} to {}", filterImpl.queryToString(fields), from, to); } Path outputPath = new Path(baseOutputPath, fileName); Job job = createJob(basePath, outputPath, beginNS, endNS, numReducers, fields, conf, fs, filterImpl); if (job == null) { LOG.info("No files to process with specified date range."); return new SequenceFileIterable(new ArrayList<>(), conf); } boolean completed = job.waitForCompletion(true); if (completed) { return readResults(outputPath, conf, fs); } else { throw new RuntimeException( "Unable to complete query due to errors. Please check logs for full errors."); } } public static long findWidth(long start, long end, int numReducers) { return Long.divideUnsigned(end - start, numReducers) + 1; } public <T> Job createJob(Path basePath, Path outputPath, long beginNS, long endNS, int numReducers, T fields, Configuration conf, FileSystem fs, PcapFilterConfigurator<T> filterImpl) throws IOException { conf.set(START_TS_CONF, Long.toUnsignedString(beginNS)); conf.set(END_TS_CONF, Long.toUnsignedString(endNS)); conf.set(WIDTH_CONF, "" + findWidth(beginNS, endNS, numReducers)); filterImpl.addToConfig(fields, conf); Job job = Job.getInstance(conf); job.setJarByClass(PcapJob.class); job.setMapperClass(PcapJob.PcapMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setNumReduceTasks(numReducers); job.setReducerClass(PcapReducer.class); job.setPartitionerClass(PcapPartitioner.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(BytesWritable.class); Iterable<String> filteredPaths = FileFilterUtil.getPathsInTimeRange(beginNS, endNS, listFiles(fs, basePath)); String inputPaths = Joiner.on(',').join(filteredPaths); if (StringUtils.isEmpty(inputPaths)) { return null; } SequenceFileInputFormat.addInputPaths(job, inputPaths); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, outputPath); return job; } protected Iterable<Path> listFiles(FileSystem fs, Path basePath) throws IOException { List<Path> ret = new ArrayList<>(); RemoteIterator<LocatedFileStatus> filesIt = fs.listFiles(basePath, true); while (filesIt.hasNext()) { ret.add(filesIt.next().getPath()); } return ret; } }