nl.sidn.pcap.parquet.DNSParquetPacketWriter.java Source code

Java tutorial

Introduction

Here is the source code for nl.sidn.pcap.parquet.DNSParquetPacketWriter.java

Source

/*
 * ENTRADA, a big data platform for network data analytics
 *
 * Copyright (C) 2016 SIDN [https://www.sidn.nl]
 * 
 * This file is part of ENTRADA.
 * 
 * ENTRADA is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * ENTRADA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with ENTRADA.  If not, see [<http://www.gnu.org/licenses/].
 *
 */
package nl.sidn.pcap.parquet;

import java.sql.Timestamp;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.commons.lang3.StringUtils;
import org.kitesdk.data.PartitionStrategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Joiner;

import nl.sidn.dnslib.message.Header;
import nl.sidn.dnslib.message.Message;
import nl.sidn.dnslib.message.Question;
import nl.sidn.dnslib.message.records.edns0.ClientSubnetOption;
import nl.sidn.dnslib.message.records.edns0.DNSSECOption;
import nl.sidn.dnslib.message.records.edns0.EDNS0Option;
import nl.sidn.dnslib.message.records.edns0.KeyTagOption;
import nl.sidn.dnslib.message.records.edns0.NSidOption;
import nl.sidn.dnslib.message.records.edns0.OPTResourceRecord;
import nl.sidn.dnslib.message.records.edns0.PaddingOption;
import nl.sidn.dnslib.message.records.edns0.PingOption;
import nl.sidn.dnslib.types.OpcodeType;
import nl.sidn.dnslib.types.RcodeType;
import nl.sidn.dnslib.types.ResourceRecordType;
import nl.sidn.dnslib.util.Domaininfo;
import nl.sidn.dnslib.util.IPUtil;
import nl.sidn.dnslib.util.NameUtil;
import nl.sidn.pcap.PcapReader;
import nl.sidn.pcap.ip.GoogleResolverCheck;
import nl.sidn.pcap.ip.OpenDNSResolverCheck;
import nl.sidn.pcap.packet.Packet;
import nl.sidn.pcap.support.PacketCombination;
import nl.sidn.pcap.util.Settings;
import nl.sidn.stats.MetricManager;

public class DNSParquetPacketWriter extends AbstractParquetPacketWriter {

    private static final Logger LOGGER = LoggerFactory.getLogger(DNSParquetPacketWriter.class);

    private static final int RCODE_QUERY_WITHOUT_RESPONSE = -1;

    //metrics
    private Set<String> domainnames = new HashSet<>();
    private long responseBytes = 0;
    private long requestBytes = 0;
    private Map<Integer, Integer> qtypes = new HashMap<>();
    private Map<Integer, Integer> rcodes = new HashMap<>();
    private Map<Integer, Integer> opcodes = new HashMap<>();
    private int requestUDPFragmentedCount = 0;
    private int requestTCPFragmentedCount = 0;
    private int responseUDPFragmentedCount = 0;
    private int responseTCPFragmentedCount = 0;
    private int ipv4QueryCount = 0;
    private int ipv6QueryCount = 0;

    private MetricManager metricManager;
    private GoogleResolverCheck googleCheck = new GoogleResolverCheck();
    private OpenDNSResolverCheck openDNSCheck = new OpenDNSResolverCheck();

    public DNSParquetPacketWriter(String repoName, String schema) {
        super(repoName, schema);
        metricManager = MetricManager.getInstance();
    }

    private Question lookupQuestion(Message reqMessage, Message respMessage) {
        if (reqMessage != null && reqMessage.getQuestions().size() > 0) {
            return reqMessage.getQuestions().get(0);
        } else if (respMessage != null && respMessage.getQuestions().size() > 0) {
            return respMessage.getQuestions().get(0);
        }
        //should never get here
        return null;
    }

    //   private Header lookupHeader(Message reqMessage, Message respMessage){
    //      if(reqMessage != null){
    //         return reqMessage.getHeader();
    //      }else if(respMessage != null){
    //         return respMessage.getHeader();
    //      }
    //      //should never get here
    //      return null;
    //   }

    private long lookupTime(Packet reqPacket, Packet respPacket) {
        if (reqPacket != null) {
            return reqPacket.getTs();
        } else if (respPacket != null) {
            return respPacket.getTs();
        }
        //should never get here
        return -1;
    }

    /**
     * create 1 parquet record which combines values from the query and the response
     * @param packet
     */
    @Override
    public void write(PacketCombination combo) {

        GenericRecordBuilder builder = newBuilder();

        packetCounter++;
        if (packetCounter % STATUS_COUNT == 0) {
            showStatus();
        }
        Packet reqTransport = combo.getRequest();
        Message requestMessage = combo.getRequestMessage();
        Packet respTransport = combo.getResponse();
        Message respMessage = combo.getResponseMessage();

        Question q = lookupQuestion(requestMessage, respMessage);

        Header requestHeader = null; //lookupHeader(requestMessage, respMessage);
        if (requestMessage != null) {
            requestHeader = requestMessage.getHeader();
        }

        //get the time in milliseconds
        long time = lookupTime(reqTransport, respTransport);
        Timestamp ts = new Timestamp((time * 1000));

        String normalizedQname = q == null ? "" : filter(q.getqName());
        normalizedQname = StringUtils.lowerCase(normalizedQname);
        Domaininfo domaininfo = NameUtil.getDomain(normalizedQname, Settings.getTldSuffixes());
        //check to see it a response was found, if not then save -1 value
        //otherwise use the rcode returned by the server in the response.
        //no response might be caused by rate limiting
        int rcode = RCODE_QUERY_WITHOUT_RESPONSE; //default no reply, use non standard rcode value -1

        //set the nameserver the queries are going to/coming from
        builder.set("svr", combo.getServer().getName());

        //if no anycast location is encoded in the name then the anycast location will be null
        builder.set("server_location", combo.getServer().getLocation());

        //add meta data
        enrich(reqTransport, respTransport, builder);

        //these are the values that are retrieved from the response
        if (respTransport != null && respMessage != null) {
            Header respHdr = respMessage.getHeader();
            rcode = respHdr.getRawRcode();

            builder.set("aa", respHdr.isAa()).set("tc", respHdr.isTc()).set("ra", respHdr.isRa())
                    .set("ad", respHdr.isAd()).set("ancount", (int) respHdr.getAnCount())
                    .set("arcount", (int) respHdr.getArCount()).set("nscount", (int) respHdr.getNsCount())
                    .set("res_len", respTransport.getTotalLength()).set("dns_res_len", respMessage.getBytes());

            //add file name
            builder.set("pcap_file", combo.getPcapFilename());

            //ip fragments in the response
            if (respTransport.isFragmented()) {
                int frags = respTransport.getReassembledFragments();
                builder.set("resp_frag", frags);

                if ((respTransport.getProtocol() == PcapReader.PROTOCOL_UDP) && frags > 1) {
                    responseUDPFragmentedCount++;
                } else if ((respTransport.getProtocol() == PcapReader.PROTOCOL_TCP) && frags > 1) {
                    responseTCPFragmentedCount++;
                }
            }

            //EDNS0 for response
            writeResponseOptions(respMessage, builder);

            //update metric
            responseBytes = responseBytes + respTransport.getUdpLength();
            if (!combo.isExpired()) {
                //do not send expired queries, this will cause duplicate timestamps with low values
                //this looks like dips in the grafana graph
                metricManager.sendAggregated(MetricManager.METRIC_IMPORT_DNS_RESPONSE_COUNT, 1, time);
            }

            //check if we have a request hdr if not continue using values
            //from the response header
            Header headerToUse = requestHeader;
            if (headerToUse == null) {
                headerToUse = respHdr;
            } else {
                //get these values only from the req header
                //if no request is found these will be null
                builder.set("q_tc", headerToUse.isTc()).set("q_ra", headerToUse.isRa())
                        .set("q_ad", headerToUse.isAd()).set("q_rcode", headerToUse.getRawRcode());
            }
            //get these values from either the req or resp header
            builder.set("id", headerToUse.getId()).set("opcode", headerToUse.getRawOpcode())
                    .set("rd", headerToUse.isRd()).set("z", headerToUse.isZ()).set("cd", headerToUse.isCd())
                    .set("qdcount", (int) headerToUse.getQdCount());

            updateMetricMap(opcodes, headerToUse.getRawOpcode());

        }

        //values from request now, if no request found then use parts of the response.
        builder.set("rcode", rcode)
                .set("unixtime", reqTransport != null ? reqTransport.getTs() : respTransport.getTs())
                .set("time", ts.getTime())
                .set("time_micro", reqTransport != null ? reqTransport.getTsmicros() : respTransport.getTsmicros())
                .set("qname", normalizedQname).set("domainname", domaininfo.name).set("labels", domaininfo.labels)
                .set("src", reqTransport != null ? reqTransport.getSrc() : respTransport.getDst())
                .set("len", reqTransport != null ? reqTransport.getTotalLength() : null)
                .set("ttl", reqTransport != null ? reqTransport.getTtl() : null)
                .set("ipv",
                        reqTransport != null ? (int) reqTransport.getIpVersion()
                                : (int) respTransport.getIpVersion())
                .set("prot",
                        reqTransport != null ? (int) reqTransport.getProtocol() : (int) respTransport.getProtocol())
                .set("srcp", reqTransport != null ? reqTransport.getSrcPort() : null)
                .set("dst", reqTransport != null ? reqTransport.getDst() : respTransport.getSrc())
                .set("dstp", reqTransport != null ? reqTransport.getDstPort() : respTransport.getSrcPort())
                .set("udp_sum", reqTransport != null ? reqTransport.getUdpsum() : null)
                .set("dns_len", requestMessage != null ? requestMessage.getBytes() : null);

        if (reqTransport != null) {

            //ip fragments in the request
            if (reqTransport.isFragmented()) {
                int req_frags = reqTransport.getReassembledFragments();
                builder.set("frag", req_frags);

                if ((reqTransport.getProtocol() == PcapReader.PROTOCOL_UDP) && req_frags > 1) {
                    requestUDPFragmentedCount++;
                } else if ((reqTransport.getProtocol() == PcapReader.PROTOCOL_TCP) && req_frags > 1) {
                    requestTCPFragmentedCount++;
                }
            }

            //update metrics
            requestBytes = requestBytes + reqTransport.getUdpLength();
            if (!combo.isExpired()) {
                //do not send expired queries, this will cause duplicate timestamps with low values
                //this looks like dips in the grafana graph
                metricManager.sendAggregated(MetricManager.METRIC_IMPORT_DNS_QUERY_COUNT, 1, time);
            }
        }

        if (rcode == RCODE_QUERY_WITHOUT_RESPONSE) {
            //no response found for query, update stats
            metricManager.sendAggregated(MetricManager.METRIC_IMPORT_DNS_NO_RESPONSE_COUNT, 1, time);
        }

        //question
        writeQuestion(q, builder);

        //EDNS0 for request
        writeRequestOptions(requestMessage, builder);

        //calculate the processing time
        writeProctime(reqTransport, respTransport, builder);

        //create the actual record and write to parquet file
        GenericRecord record = builder.build();
        writer.write(record);

        //create metrics
        domainnames.add(domaininfo.name);
        updateMetricMap(rcodes, rcode);

        //ip version stats
        updateIpVersionMetrics(reqTransport, respTransport);

        //if packet was expired and dropped from cache then increase stats for this
        if (combo.isExpired()) {
            metricManager.sendAggregated(MetricManager.METRIC_IMPORT_CACHE_EXPPIRED_DNS_QUERY_COUNT, 1, time,
                    false);
        }
    }

    private void enrich(Packet reqPacket, Packet respPacket, GenericRecordBuilder builder) {
        String country = null;
        String asn = null;
        boolean isGoogle = false;
        boolean isOpenDNS = false;
        if (reqPacket != null) {
            //request packet, check the source address
            country = getCountry(reqPacket.getSrc());
            asn = getAsn(reqPacket.getSrc(), reqPacket.getIpVersion() == 4);
            isGoogle = googleCheck.isMatch(reqPacket.getSrc());
            if (!isGoogle) {
                isOpenDNS = openDNSCheck.isMatch(reqPacket.getSrc());
            }
        } else {
            //response packet, check the destination address
            country = getCountry(respPacket.getDst());
            asn = getAsn(respPacket.getDst(), respPacket.getIpVersion() == 4);
            isGoogle = googleCheck.isMatch(respPacket.getDst());
            if (!isGoogle) {
                isOpenDNS = openDNSCheck.isMatch(respPacket.getDst());
            }
        }

        builder.set("country", country).set("asn", asn)
                //check of the resolver is a google backend resolver
                .set("is_google", isGoogle)
                //check of the resolver is a opendns backend resolver
                .set("is_opendns", isOpenDNS);
    }

    private void updateIpVersionMetrics(Packet req, Packet resp) {
        if (req != null) {
            if (req.getIpVersion() == 4) {
                ipv4QueryCount++;
            } else {
                ipv6QueryCount++;
            }
        } else {
            if (resp.getIpVersion() == 4) {
                ipv4QueryCount++;
            } else {
                ipv6QueryCount++;
            }
        }
    }

    private void writeQuestion(Question q, GenericRecordBuilder builder) {
        if (q != null) {
            //unassigned, private or unknown, get raw value
            builder.set("qtype", q.getqTypeValue());
            //unassigned, private or unknown, get raw value
            builder.set("qclass", q.getqClassValue());
            //qtype metrics
            updateMetricMap(qtypes, q.getqTypeValue());
        }
    }

    //calc the number of seconds between receivinfg the response and sending it back to the resolver
    private void writeProctime(Packet reqTransport, Packet respTransport, GenericRecordBuilder builder) {
        if (reqTransport != null && respTransport != null) {
            Timestamp reqTs = new Timestamp((reqTransport.getTs() * 1000000));
            Timestamp respTs = new Timestamp((respTransport.getTs() * 1000000));

            //from second to microseconds
            long millis1 = respTs.getTime() - reqTs.getTime();
            long millis2 = (respTransport.getTsmicros() - reqTransport.getTsmicros());
            builder.set("proc_time", millis1 + millis2);
        }
    }

    /**
     * Write EDNS0 option (if any are present) to file. 
     * @param message
     * @param builder
     */
    private void writeResponseOptions(Message message, GenericRecordBuilder builder) {
        if (message == null) {
            return;
        }

        OPTResourceRecord opt = message.getPseudo();
        if (opt != null) {
            for (EDNS0Option option : opt.getOptions()) {
                if (option instanceof NSidOption) {
                    String id = ((NSidOption) option).getId();
                    builder.set("edns_nsid", id != null ? id : "");

                    //this is the only server edns data we support, stop processing other options
                    break;
                }
            }
        }

    }

    /**
     * Write EDNS0 option (if any are present) to file. 
     * @param message
     * @param builder
     */
    private void writeRequestOptions(Message message, GenericRecordBuilder builder) {
        if (message == null) {
            return;
        }

        OPTResourceRecord opt = message.getPseudo();
        if (opt != null) {
            builder.set("edns_udp", (int) opt.getUdpPlayloadSize()).set("edns_version", (int) opt.getVersion())
                    .set("edns_do", opt.getDnssecDo()).set("edns_padding", -1); //use default no padding found

            String other = null;
            for (EDNS0Option option : opt.getOptions()) {
                if (option instanceof PingOption) {
                    builder.set("edns_ping", true);
                } else if (option instanceof DNSSECOption) {
                    if (option.getCode() == DNSSECOption.OPTION_CODE_DAU) {
                        builder.set("edns_dnssec_dau", ((DNSSECOption) option).export());
                    } else if (option.getCode() == DNSSECOption.OPTION_CODE_DHU) {
                        builder.set("edns_dnssec_dhu", ((DNSSECOption) option).export());
                    } else { //N3U
                        builder.set("edns_dnssec_n3u", ((DNSSECOption) option).export());
                    }
                } else if (option instanceof ClientSubnetOption) {
                    ClientSubnetOption scOption = (ClientSubnetOption) option;
                    //get client country and asn
                    String clientCountry = null;
                    String clientASN = null;
                    if (scOption.getAddress() != null) {
                        if (scOption.isIPv4()) {
                            try {
                                byte[] addrBytes = IPUtil.ipv4tobytes(scOption.getAddress());
                                clientCountry = geoLookup.lookupCountry(addrBytes);
                                clientASN = geoLookup.lookupASN(addrBytes, true);
                            } catch (Exception e) {
                                LOGGER.error("Could not convert IPv4 addr to bytes, invalid address? :"
                                        + scOption.getAddress());
                            }
                        } else {
                            try {
                                byte[] addrBytes = IPUtil.ipv6tobytes(scOption.getAddress());
                                clientCountry = geoLookup.lookupCountry(addrBytes);
                                clientASN = geoLookup.lookupASN(addrBytes, false);
                            } catch (Exception e) {
                                LOGGER.error("Could not convert IPv6 addr to bytes, invalid address? :"
                                        + scOption.getAddress());
                            }
                        }
                    }
                    builder.set("edns_client_subnet", scOption.export()).set("edns_client_subnet_asn", clientASN)
                            .set("edns_client_subnet_country", clientCountry);

                } else if (option instanceof PaddingOption) {
                    builder.set("edns_padding", ((PaddingOption) option).getLength());
                } else if (option instanceof KeyTagOption) {
                    KeyTagOption kto = (KeyTagOption) option;
                    builder.set("edns_keytag_count", kto.getKeytags().size());
                    builder.set("edns_keytag_list", Joiner.on(",").join(kto.getKeytags()));
                } else {
                    //other
                    if (StringUtils.length(other) > 0) {
                        other = other + "," + option.getCode();
                    } else {
                        other = String.valueOf(option.getCode());
                    }
                }
            }

            if (other != null) {
                builder.set("edns_other", other);
            }
        }
    }

    @Override
    protected PartitionStrategy createPartitionStrategy() {
        return new PartitionStrategy.Builder().year("time").month("time").day("time").identity("svr", "server")
                .build();
    }

    @Override
    public void writeMetrics() {

        for (Integer key : rcodes.keySet()) {
            Integer value = rcodes.get(key);
            if (key.intValue() == -1) {
                //pseudo rcode -1 means no reponse, not an official rcode
                metricManager.send(MetricManager.METRIC_IMPORT_DNS_RCODE + ".NO_RESPONSE.count", value.intValue());
            } else {
                RcodeType type = RcodeType.fromValue(key.intValue());
                metricManager.send(
                        MetricManager.METRIC_IMPORT_DNS_RCODE + StringUtils.upperCase("." + type.name()) + ".count",
                        value.intValue());
            }
        }

        metricManager.send(MetricManager.METRIC_IMPORT_IP_COUNT, geo_ip_cache.size());
        metricManager.send(MetricManager.METRIC_IMPORT_COUNTRY_COUNT, countries.size());
        metricManager.send(MetricManager.METRIC_IMPORT_ASN_COUNT, asn_cache.size());

        metricManager.send(MetricManager.METRIC_IMPORT_DNS_DOMAINNAME_COUNT, domainnames.size());
        if (responseBytes > 0) {
            metricManager.send(MetricManager.METRIC_IMPORT_DNS_RESPONSE_BYTES_SIZE, (int) (responseBytes / 1024));
        } else {
            metricManager.send(MetricManager.METRIC_IMPORT_DNS_RESPONSE_BYTES_SIZE, 0);
        }
        if (requestBytes > 0) {
            metricManager.send(MetricManager.METRIC_IMPORT_DNS_QUERY_BYTES_SIZE, (int) (requestBytes / 1024));
        } else {
            metricManager.send(MetricManager.METRIC_IMPORT_DNS_QUERY_BYTES_SIZE, 0);
        }

        for (Integer key : qtypes.keySet()) {
            Integer value = qtypes.get(key);
            ResourceRecordType type = ResourceRecordType.fromValue(key.intValue());
            metricManager.send(
                    MetricManager.METRIC_IMPORT_DNS_QTYPE + StringUtils.upperCase("." + type.name()) + ".count",
                    value.intValue());
        }

        for (Integer key : opcodes.keySet()) {
            Integer value = opcodes.get(key);
            OpcodeType type = OpcodeType.fromValue(key.intValue());
            metricManager.send(
                    MetricManager.METRIC_IMPORT_DNS_OPCODE + StringUtils.upperCase("." + type.name()) + ".count",
                    value.intValue());
        }

        metricManager.send(MetricManager.METRIC_IMPORT_UDP_REQUEST_FRAGMENTED_COUNT, requestUDPFragmentedCount);
        metricManager.send(MetricManager.METRIC_IMPORT_TCP_REQUEST_FRAGMENTED_COUNT, requestTCPFragmentedCount);
        metricManager.send(MetricManager.METRIC_IMPORT_UDP_RESPONSE_FRAGMENTED_COUNT, responseUDPFragmentedCount);
        metricManager.send(MetricManager.METRIC_IMPORT_TCP_RESPONSE_FRAGMENTED_COUNT, responseTCPFragmentedCount);

        metricManager.send(MetricManager.METRIC_IMPORT_IP_VERSION_4_COUNT, ipv4QueryCount);
        metricManager.send(MetricManager.METRIC_IMPORT_IP_VERSION_6_COUNT, ipv6QueryCount);

    }

}