org.torproject.ernie.db.SanitizedBridgesWriter.java Source code

Introduction

Here is the source code for org.torproject.ernie.db.SanitizedBridgesWriter.java
Source

/* Copyright 2010 The Tor Project
 * See LICENSE for licensing information */
package org.torproject.ernie.db;

import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.logging.*;

import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.digest.*;
import org.apache.commons.codec.binary.*;

/**
 * Sanitizes bridge descriptors, i.e., removes all possibly sensitive
 * information from them, and writes them to a local directory structure.
 * During the sanitizing process, all information about the bridge
 * identity or IP address are removed or replaced. The goal is to keep the
 * sanitized bridge descriptors useful for statistical analysis while not
 * making it easier for an adversary to enumerate bridges.
 *
 * There are three types of bridge descriptors: bridge network statuses
 * (lists of all bridges at a given time), server descriptors (published
 * by the bridge to advertise their capabilities), and extra-info
 * descriptors (published by the bridge, mainly for statistical analysis).
 *
 * Network statuses, server descriptors, and extra-info descriptors are
 * linked via descriptor digests: extra-info descriptors are referenced
 * from server descriptors, and server descriptors are referenced from
 * network statuses. These references need to be changed during the
 * sanitizing process, because descriptor contents change and so do the
 * descriptor digests.
 *
 * No assumptions are made about the order in which bridge descriptors are
 * parsed. The approach taken here is to sanitize bridge descriptors even
 * with incomplete knowledge about references and to update them as soon
 * as these information get known. We are keeping a persistent data
 * structure, the bridge descriptor mapping, to hold information about
 * every single descriptor. The idea is that every descriptor is (a)
 * referenced from a network status and consists of (b) a server
 * descriptor and (c) an extra-info descriptor, both of which are
 * published at the same time. Using this data structure, we can repair
 * references as soon as we learn more about the descriptor and regardless
 * of the order of incoming bridge descriptors.
 *
 * The process of sanitizing a bridge descriptor is as follows, depending
 * on the type of descriptor:
 *
 * Network statuses are processed by sanitizing every r line separately
 * and looking up whether the descriptor mapping contains a bridge with
 * given identity hash and descriptor publication time. If so, the new
 * server descriptor identifier can be added. If not, we're adding all
 * 0's.
 *
 * While sanitizing a server descriptor, its identity hash and publication
 * time are looked up in order to put in the extra-info descriptor
 * identifier in case the corresponding extra-info descriptor was
 * sanitized before. Further, its publication time is noted down, so that
 * all network statuses that might be referencing this server descriptor
 * can be re-written at the end of the sanitizing procedure.
 *
 * Extra-info descriptors are processed by looking up their identity hash
 * and publication time in the descriptor mapping. If the corresponding
 * server descriptor was sanitized before, the server descriptor is
 * re-written to include the new extra-info descriptor digest, and the
 * publication time is noted down in order to re-write the network
 * statuses possibly referencing this extra-info descriptor and its
 * corresponding server descriptor at the end of the sanitizing process.
 *
 * After sanitizing all bridge descriptors, the network statuses that
 * might be referencing server descriptors which have been (re-)written
 * during this execution are re-written, too. This may be necessary in
 * order to update previously broken references to server descriptors.
 */
public class SanitizedBridgesWriter {

    /**
     * Hex representation of null reference that is written to bridge
     * descriptors if we don't have the real reference, yet.
     */
    private static final String NULL_REFERENCE = "0000000000000000000000000000000000000000";

    /**
     * Mapping between a descriptor as referenced from a network status to
     * the digests of server descriptor and extra-info descriptor.
     */
    private static class DescriptorMapping {

        /**
         * Creates a new mapping from comma-separated values as read from the
         * persistent mapping file.
         */
        private DescriptorMapping(String commaSeparatedValues) {
            String[] parts = commaSeparatedValues.split(",");
            this.hashedBridgeIdentity = parts[0];
            this.published = parts[1];
            this.serverDescriptorIdentifier = parts[2];
            this.extraInfoDescriptorIdentifier = parts[3];
        }

        /**
         * Creates a new mapping for a given identity hash and descriptor
         * publication time that has all 0's as descriptor digests.
         */
        private DescriptorMapping(String hashedBridgeIdentity, String published) {
            this.hashedBridgeIdentity = hashedBridgeIdentity;
            this.published = published;
            this.serverDescriptorIdentifier = NULL_REFERENCE;
            this.extraInfoDescriptorIdentifier = NULL_REFERENCE;
        }

        private String hashedBridgeIdentity;
        private String published;
        private String serverDescriptorIdentifier;
        private String extraInfoDescriptorIdentifier;

        /**
         * Returns a string representation of this descriptor mapping that can
         * be written to the persistent mapping file.
         */
        public String toString() {
            return this.hashedBridgeIdentity + "," + this.published + "," + this.serverDescriptorIdentifier + ","
                    + this.extraInfoDescriptorIdentifier;
        }
    }

    /**
     * File containing the mapping between network status entries, server
     * descriptors, and extra-info descriptors.
     */
    private File bridgeDescriptorMappingsFile;

    /**
     * Mapping between status entries, server descriptors, and extra-info
     * descriptors. This mapping is required to re-establish the references
     * from status entries to server descriptors and from server descriptors
     * to extra-info descriptors. The original references are broken when
     * sanitizing, because descriptor contents change and so do the
     * descriptor digests that are used for referencing. Map key contains
     * hashed bridge identity and descriptor publication time, map value
     * contains map key plus new server descriptor identifier and new
     * extra-info descriptor identifier.
     */
    private SortedMap<String, DescriptorMapping> bridgeDescriptorMappings;

    /**
     * Logger for this class.
     */
    private Logger logger;

    /**
     * Publication times of server descriptors and extra-info descriptors
     * parsed in the current execution. These times are used to determine
     * which statuses need to be rewritten at the end of the execution.
     */
    private SortedSet<String> descriptorPublicationTimes;

    /**
     * Output directory for writing sanitized bridge descriptors.
     */
    private String sanitizedBridgesDir;

    /**
     * Initializes this class, including reading in the known descriptor
     * mapping.
     */
    public SanitizedBridgesWriter(String dir) {

        /* Memorize argument values. */
        this.sanitizedBridgesDir = dir;

        /* Initialize logger. */
        this.logger = Logger.getLogger(SanitizedBridgesWriter.class.getName());

        /* Initialize data structure. */
        this.bridgeDescriptorMappings = new TreeMap<String, DescriptorMapping>();
        this.descriptorPublicationTimes = new TreeSet<String>();

        /* Read known descriptor mappings from disk. */
        this.bridgeDescriptorMappingsFile = new File("stats/bridge-descriptor-mappings");
        if (this.bridgeDescriptorMappingsFile.exists()) {
            try {
                BufferedReader br = new BufferedReader(new FileReader(this.bridgeDescriptorMappingsFile));
                String line = null;
                while ((line = br.readLine()) != null) {
                    if (line.split(",").length == 4) {
                        String[] parts = line.split(",");
                        DescriptorMapping dm = new DescriptorMapping(line);
                        dm.hashedBridgeIdentity = parts[0];
                        dm.published = parts[1];
                        dm.serverDescriptorIdentifier = parts[2];
                        dm.extraInfoDescriptorIdentifier = parts[3];
                        this.bridgeDescriptorMappings.put(line.split(",")[0] + "," + line.split(",")[1], dm);
                    } else {
                        this.logger.warning("Corrupt line '" + line + "' in "
                                + this.bridgeDescriptorMappingsFile.getAbsolutePath() + ". Skipping.");
                        continue;
                    }
                }
                br.close();
            } catch (IOException e) {
                this.logger.log(Level.WARNING,
                        "Could not read in " + this.bridgeDescriptorMappingsFile.getAbsolutePath() + ".");
                return;
            }
        }
    }

    /**
     * Sanitizes a network status and writes it to disk. Processes every r
     * line separately and looks up whether the descriptor mapping contains
     * a bridge with given identity hash and descriptor publication time. */
    public void sanitizeAndStoreNetworkStatus(byte[] data, String publicationTime) {

        /* Parse the given network status line by line. */
        StringBuilder scrubbed = new StringBuilder();
        try {
            BufferedReader br = new BufferedReader(new StringReader(new String(data, "US-ASCII")));
            String line = null;
            while ((line = br.readLine()) != null) {

                /* r lines contain sensitive information that needs to be removed
                 * or replaced. */
                if (line.startsWith("r ")) {

                    /* Parse the relevant parts of this r line. */
                    String[] parts = line.split(" ");
                    String bridgeIdentity = parts[2];
                    String descPublicationTime = parts[4] + " " + parts[5];
                    String orPort = parts[7];
                    String dirPort = parts[8];

                    /* Look up the descriptor in the descriptor mapping, or add a
                     * new mapping entry if there is none. */
                    String hashedBridgeIdentityHex = Hex
                            .encodeHexString(DigestUtils.sha(Base64.decodeBase64(bridgeIdentity + "==")))
                            .toLowerCase();
                    String mappingKey = hashedBridgeIdentityHex + "," + descPublicationTime;
                    DescriptorMapping mapping = null;
                    if (this.bridgeDescriptorMappings.containsKey(mappingKey)) {
                        mapping = this.bridgeDescriptorMappings.get(mappingKey);
                    } else {
                        mapping = new DescriptorMapping(hashedBridgeIdentityHex.toLowerCase(), descPublicationTime);
                        this.bridgeDescriptorMappings.put(mappingKey, mapping);
                    }

                    /* Write scrubbed r line to buffer. */
                    String hashedBridgeIdentityBase64 = Base64
                            .encodeBase64String(DigestUtils.sha(Base64.decodeBase64(bridgeIdentity + "==")))
                            .substring(0, 27);
                    String sdi = Base64
                            .encodeBase64String(Hex.decodeHex(mapping.serverDescriptorIdentifier.toCharArray()))
                            .substring(0, 27);
                    scrubbed.append("r Unnamed " + hashedBridgeIdentityBase64 + " " + sdi + " "
                            + descPublicationTime + " 127.0.0.1 " + orPort + " " + dirPort + "\n");

                    /* Nothing special about s lines; just copy them. */
                } else if (line.startsWith("s ")) {
                    scrubbed.append(line + "\n");

                    /* There should be nothing else but r and s lines in the network
                     * status. If there is, we should probably learn before writing
                     * anything to the sanitized descriptors. */
                } else {
                    this.logger.fine(
                            "Unknown line '" + line + "' in bridge " + "network status. Not writing to disk!");
                    return;
                }
            }
            br.close();

        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not parse bridge network " + "status.", e);
            return;
        } catch (DecoderException e) {
            this.logger.log(Level.WARNING, "Could not parse bridge network " + "status.", e);
            return;
        }

        /* Write the sanitized network status to disk. */
        try {

            /* Determine file name. */
            String syear = publicationTime.substring(0, 4);
            String smonth = publicationTime.substring(5, 7);
            String sday = publicationTime.substring(8, 10);
            String stime = publicationTime.substring(11, 13) + publicationTime.substring(14, 16)
                    + publicationTime.substring(17, 19);
            File statusFile = new File(this.sanitizedBridgesDir + "/" + syear + "/" + smonth + "/statuses/" + sday
                    + "/" + syear + smonth + sday + "-" + stime + "-" + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");

            /* Create all parent directories to write this network status. */
            statusFile.getParentFile().mkdirs();

            /* Write sanitized network status to disk. */
            BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
            bw.write(scrubbed.toString());
            bw.close();

        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not write sanitized bridge " + "network status to disk.", e);
            return;
        }
    }

    /**
     * Sanitizes a bridge server descriptor and writes it to disk. Looks up
     * up bridge identity hash and publication time in the descriptor
     * mapping. After sanitizing a server descriptor, its publication time
     * is noted down, so that all network statuses that might be referencing
     * this server descriptor can be re-written at the end of the sanitizing
     * procedure.
     */
    public void sanitizeAndStoreServerDescriptor(byte[] data) {

        /* Parse descriptor to generate a sanitized version and to look it up
         * in the descriptor mapping. */
        String scrubbedDesc = null;
        DescriptorMapping mapping = null;
        try {
            BufferedReader br = new BufferedReader(new StringReader(new String(data, "US-ASCII")));
            StringBuilder scrubbed = new StringBuilder();
            String line = null, hashedBridgeIdentity = null, published = null;
            boolean skipCrypto = false;
            while ((line = br.readLine()) != null) {

                /* When we have parsed both published and fingerprint line, look
                 * up descriptor in the descriptor mapping or create a new one if
                 * there is none. */
                if (mapping == null && published != null && hashedBridgeIdentity != null) {
                    String mappingKey = hashedBridgeIdentity + "," + published;
                    if (this.bridgeDescriptorMappings.containsKey(mappingKey)) {
                        mapping = this.bridgeDescriptorMappings.get(mappingKey);
                    } else {
                        mapping = new DescriptorMapping(hashedBridgeIdentity, published);
                        this.bridgeDescriptorMappings.put(mappingKey, mapping);
                    }
                }

                /* Skip all crypto parts that might be used to derive the bridge's
                 * identity fingerprint. */
                if (skipCrypto && !line.startsWith("-----END ")) {
                    continue;

                    /* Parse the original IP address for looking it up in the GeoIP
                     * database and replace it with 127.0.0.1 in the scrubbed
                     * version. */
                } else if (line.startsWith("router ")) {
                    scrubbed = new StringBuilder("router Unnamed 127.0.0.1 " + line.split(" ")[3] + " "
                            + line.split(" ")[4] + " " + line.split(" ")[5] + "\n");

                    /* Parse the publication time and add it to the list of descriptor
                     * publication times to re-write network statuses at the end of
                     * the sanitizing procedure. */
                } else if (line.startsWith("published ")) {
                    published = line.substring("published ".length());
                    this.descriptorPublicationTimes.add(published);
                    scrubbed.append(line + "\n");

                    /* Parse the fingerprint to determine the hashed bridge
                     * identity. */
                } else if (line.startsWith("opt fingerprint ")) {
                    String fingerprint = line
                            .substring(
                                    line.startsWith("opt ") ? "opt fingerprint".length() : "fingerprint".length())
                            .replaceAll(" ", "").toLowerCase();
                    hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(fingerprint.toCharArray()))
                            .toLowerCase();
                    scrubbed.append("opt fingerprint");
                    for (int i = 0; i < hashedBridgeIdentity.length() / 4; i++)
                        scrubbed.append(" " + hashedBridgeIdentity.substring(4 * i, 4 * (i + 1)).toUpperCase());
                    scrubbed.append("\n");

                    /* Replace the contact line (if present) with a generic one. */
                } else if (line.startsWith("contact ")) {
                    scrubbed.append("contact somebody\n");

                    /* When we reach the signature, we're done. Write the sanitized
                     * descriptor to disk below. */
                } else if (line.startsWith("router-signature")) {
                    scrubbedDesc = scrubbed.toString();
                    break;

                    /* Replace extra-info digest with the one we know from our
                     * descriptor mapping (which might be all 0's if we didn't parse
                     * the extra-info descriptor before). */
                } else if (line.startsWith("opt extra-info-digest ")) {
                    scrubbed.append(
                            "opt extra-info-digest " + mapping.extraInfoDescriptorIdentifier.toUpperCase() + "\n");

                    /* Write the following lines unmodified to the sanitized
                     * descriptor. */
                } else if (line.startsWith("reject ") || line.startsWith("accept ") || line.startsWith("platform ")
                        || line.startsWith("opt protocols ") || line.startsWith("uptime ")
                        || line.startsWith("bandwidth ") || line.startsWith("opt hibernating ")
                        || line.equals("opt hidden-service-dir") || line.equals("opt caches-extra-info")
                        || line.equals("opt allow-single-hop-exits")) {
                    scrubbed.append(line + "\n");

                    /* Replace node fingerprints in the family line with their hashes
                     * and nicknames with Unnamed. */
                } else if (line.startsWith("family ")) {
                    StringBuilder familyLine = new StringBuilder("family");
                    for (String s : line.substring(7).split(" ")) {
                        if (s.startsWith("$")) {
                            familyLine.append(" $" + DigestUtils.shaHex(Hex.decodeHex(s.substring(1).toCharArray()))
                                    .toUpperCase());
                        } else {
                            familyLine.append(" Unnamed");
                        }
                    }
                    scrubbed.append(familyLine.toString() + "\n");

                    /* Skip the purpose line that the bridge authority adds to its
                     * cached-descriptors file. */
                } else if (line.startsWith("@purpose ")) {
                    continue;

                    /* Skip all crypto parts that might leak the bridge's identity
                     * fingerprint. */
                } else if (line.startsWith("-----BEGIN ") || line.equals("onion-key")
                        || line.equals("signing-key")) {
                    skipCrypto = true;

                    /* Stop skipping lines when the crypto parts are over. */
                } else if (line.startsWith("-----END ")) {
                    skipCrypto = false;

                    /* If we encounter an unrecognized line, stop parsing and print
                     * out a warning. We might have overlooked sensitive information
                     * that we need to remove or replace for the sanitized descriptor
                     * version. */
                } else {
                    this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
                    return;
                }
            }
            br.close();
        } catch (Exception e) {
            this.logger.log(Level.WARNING, "Could not parse server " + "descriptor.", e);
            return;
        }

        /* Determine new descriptor digest and write it to descriptor
         * mapping. */
        String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
        mapping.serverDescriptorIdentifier = scrubbedHash;

        /* Determine filename of sanitized server descriptor. */
        String dyear = mapping.published.substring(0, 4);
        String dmonth = mapping.published.substring(5, 7);
        File newFile = new File(this.sanitizedBridgesDir + "/" + dyear + "/" + dmonth + "/server-descriptors/" + "/"
                + scrubbedHash.charAt(0) + "/" + scrubbedHash.charAt(1) + "/" + scrubbedHash);

        /* Write sanitized server descriptor to disk, including all its parent
         * directories. */
        try {
            newFile.getParentFile().mkdirs();
            BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
            bw.write(scrubbedDesc);
            bw.close();
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not write sanitized server " + "descriptor to disk.", e);
            return;
        }
    }

    /**
     * Sanitizes an extra-info descriptor and writes it to disk. Looks up
     * the bridge identity hash and publication time in the descriptor
     * mapping. If the corresponding server descriptor was sanitized before,
     * it is re-written to include the new extra-info descriptor digest and
     * the publication time is noted down, too, so that all network statuses
     * possibly referencing this extra-info descriptor and its corresponding
     * server descriptor can be re-written at the end of the sanitizing
     * procedure.
     */
    public void sanitizeAndStoreExtraInfoDescriptor(byte[] data) {

        /* Parse descriptor to generate a sanitized version and to look it up
         * in the descriptor mapping. */
        String scrubbedDesc = null, published = null;
        DescriptorMapping mapping = null;
        try {
            BufferedReader br = new BufferedReader(new StringReader(new String(data, "US-ASCII")));
            String line = null;
            StringBuilder scrubbed = null;
            String hashedBridgeIdentity = null;
            while ((line = br.readLine()) != null) {

                /* When we have parsed both published and fingerprint line, look
                 * up descriptor in the descriptor mapping or create a new one if
                 * there is none. */
                if (mapping == null && published != null && hashedBridgeIdentity != null) {
                    String mappingKey = hashedBridgeIdentity + "," + published;
                    if (this.bridgeDescriptorMappings.containsKey(mappingKey)) {
                        mapping = this.bridgeDescriptorMappings.get(mappingKey);
                    } else {
                        mapping = new DescriptorMapping(hashedBridgeIdentity, published);
                        this.bridgeDescriptorMappings.put(mappingKey, mapping);
                    }
                }

                /* Parse bridge identity from extra-info line and replace it with
                 * its hash in the sanitized descriptor. */
                if (line.startsWith("extra-info ")) {
                    hashedBridgeIdentity = DigestUtils.shaHex(Hex.decodeHex(line.split(" ")[2].toCharArray()))
                            .toLowerCase();
                    scrubbed = new StringBuilder("extra-info Unnamed " + hashedBridgeIdentity.toUpperCase() + "\n");

                    /* Parse the publication time and add it to the list of descriptor
                     * publication times to re-write network statuses at the end of
                     * the sanitizing procedure. */
                } else if (line.startsWith("published ")) {
                    scrubbed.append(line + "\n");
                    published = line.substring("published ".length());

                    /* Write the following lines unmodified to the sanitized
                     * descriptor. */
                } else if (line.startsWith("write-history ") || line.startsWith("read-history ")
                        || line.startsWith("geoip-start-time ") || line.startsWith("geoip-client-origins ")
                        || line.startsWith("bridge-stats-end ") || line.startsWith("bridge-ips ")) {
                    scrubbed.append(line + "\n");

                    /* When we reach the signature, we're done. Write the sanitized
                     * descriptor to disk below. */
                } else if (line.startsWith("router-signature")) {
                    scrubbedDesc = scrubbed.toString();
                    break;
                    /* Don't include statistics that should only be contained in relay
                     * extra-info descriptors. */
                } else if (line.startsWith("dirreq-") || line.startsWith("cell-") || line.startsWith("exit-")) {
                    continue;

                    /* If we encounter an unrecognized line, stop parsing and print
                     * out a warning. We might have overlooked sensitive information
                     * that we need to remove or replace for the sanitized descriptor
                     * version. */
                } else {
                    this.logger.fine("Unrecognized line '" + line + "'. Skipping.");
                    return;
                }
            }
            br.close();
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not parse extra-info " + "descriptor.", e);
            return;
        } catch (DecoderException e) {
            this.logger.log(Level.WARNING, "Could not parse extra-info " + "descriptor.", e);
            return;
        }

        /* Determine new descriptor digest and check if write it to descriptor
         * mapping. */
        String scrubbedDescHash = DigestUtils.shaHex(scrubbedDesc);
        boolean extraInfoDescriptorIdentifierHasChanged = !scrubbedDescHash
                .equals(mapping.extraInfoDescriptorIdentifier);
        mapping.extraInfoDescriptorIdentifier = scrubbedDescHash;
        if (extraInfoDescriptorIdentifierHasChanged && !mapping.serverDescriptorIdentifier.equals(NULL_REFERENCE)) {
            this.rewriteServerDescriptor(mapping);
            this.descriptorPublicationTimes.add(published);
        }

        /* Determine filename of sanitized server descriptor. */
        String dyear = mapping.published.substring(0, 4);
        String dmonth = mapping.published.substring(5, 7);
        File newFile = new File(this.sanitizedBridgesDir + "/" + dyear + "/" + dmonth + "/extra-infos/"
                + scrubbedDescHash.charAt(0) + "/" + scrubbedDescHash.charAt(1) + "/" + scrubbedDescHash);

        /* Write sanitized server descriptor to disk, including all its parent
         * directories. */
        try {
            newFile.getParentFile().mkdirs();
            BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
            bw.write(scrubbedDesc);
            bw.close();
        } catch (Exception e) {
            this.logger.log(Level.WARNING, "Could not write sanitized " + "extra-info descriptor to disk.", e);
        }
    }

    public void storeSanitizedNetworkStatus(byte[] data, String published) {
        String scrubbed = null;
        try {
            String ascii = new String(data, "US-ASCII");
            BufferedReader br2 = new BufferedReader(new StringReader(ascii));
            StringBuilder sb = new StringBuilder();
            String line = null;
            while ((line = br2.readLine()) != null) {
                if (line.startsWith("r ")) {
                    String hashedBridgeIdentity = Hex
                            .encodeHexString(Base64.decodeBase64(line.split(" ")[2] + "==")).toLowerCase();
                    String hashedBridgeIdentityBase64 = line.split(" ")[2];
                    String readServerDescId = Hex.encodeHexString(Base64.decodeBase64(line.split(" ")[3] + "=="))
                            .toLowerCase();
                    String descPublished = line.split(" ")[4] + " " + line.split(" ")[5];
                    String mappingKey = (hashedBridgeIdentity + "," + descPublished).toLowerCase();
                    DescriptorMapping mapping = null;
                    if (this.bridgeDescriptorMappings.containsKey(mappingKey)) {
                        mapping = this.bridgeDescriptorMappings.get(mappingKey);
                    } else {
                        mapping = new DescriptorMapping(hashedBridgeIdentity.toLowerCase(), descPublished);
                        mapping.serverDescriptorIdentifier = readServerDescId;
                        this.bridgeDescriptorMappings.put(mappingKey, mapping);
                    }
                    String sdi = Base64
                            .encodeBase64String(Hex.decodeHex(mapping.serverDescriptorIdentifier.toCharArray()))
                            .substring(0, 27);
                    String orPort = line.split(" ")[7];
                    String dirPort = line.split(" ")[8];
                    sb.append("r Unnamed " + hashedBridgeIdentityBase64 + " " + sdi + " " + descPublished
                            + " 127.0.0.1 " + orPort + " " + dirPort + "\n");
                } else {
                    sb.append(line + "\n");
                }
            }
            scrubbed = sb.toString();
            br2.close();
        } catch (DecoderException e) {
            this.logger.log(Level.WARNING, "Could not parse server descriptor " + "identifier. This must be a bug.",
                    e);
            return;
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not parse previously " + "sanitized network status.", e);
            return;
        }

        try {
            /* Determine file name. */
            String syear = published.substring(0, 4);
            String smonth = published.substring(5, 7);
            String sday = published.substring(8, 10);
            String stime = published.substring(11, 13) + published.substring(14, 16) + published.substring(17, 19);
            File statusFile = new File(this.sanitizedBridgesDir + "/" + syear + "/" + smonth + "/statuses/" + sday
                    + "/" + syear + smonth + sday + "-" + stime + "-" + "4A0CCD2DDC7995083D73F5D667100C8A5831F16D");

            /* Create all parent directories to write this network status. */
            statusFile.getParentFile().mkdirs();

            /* Write sanitized network status to disk. */
            BufferedWriter bw = new BufferedWriter(new FileWriter(statusFile));
            bw.write(scrubbed);
            bw.close();
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not write previously " + "sanitized network status.", e);
            return;
        }
    }

    public void storeSanitizedServerDescriptor(byte[] data) {
        try {
            String ascii = new String(data, "US-ASCII");
            BufferedReader br2 = new BufferedReader(new StringReader(ascii));
            StringBuilder sb = new StringBuilder();
            String line2 = null, published = null;
            String hashedBridgeIdentity = null;
            DescriptorMapping mapping = null;
            while ((line2 = br2.readLine()) != null) {
                if (mapping == null && published != null && hashedBridgeIdentity != null) {
                    String mappingKey = (hashedBridgeIdentity + "," + published).toLowerCase();
                    if (this.bridgeDescriptorMappings.containsKey(mappingKey)) {
                        mapping = this.bridgeDescriptorMappings.get(mappingKey);
                    } else {
                        mapping = new DescriptorMapping(hashedBridgeIdentity.toLowerCase(), published);
                        this.bridgeDescriptorMappings.put(mappingKey, mapping);
                    }
                }
                if (line2.startsWith("router ")) {
                    sb.append("router Unnamed 127.0.0.1 " + line2.split(" ")[3] + " " + line2.split(" ")[4] + " "
                            + line2.split(" ")[5] + "\n");
                } else if (line2.startsWith("published ")) {
                    published = line2.substring("published ".length());
                    sb.append(line2 + "\n");
                    this.descriptorPublicationTimes.add(published);
                } else if (line2.startsWith("opt fingerprint ")) {
                    hashedBridgeIdentity = line2.substring("opt fingerprint".length()).replaceAll(" ", "")
                            .toLowerCase();
                    sb.append(line2 + "\n");
                } else if (line2.startsWith("opt extra-info-digest ")) {
                    sb.append(
                            "opt extra-info-digest " + mapping.extraInfoDescriptorIdentifier.toUpperCase() + "\n");
                } else {
                    sb.append(line2 + "\n");
                }
            }
            br2.close();
            String scrubbedDesc = sb.toString();
            String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);

            mapping.serverDescriptorIdentifier = scrubbedHash;
            String dyear = published.substring(0, 4);
            String dmonth = published.substring(5, 7);
            File newFile = new File(this.sanitizedBridgesDir + "/" + dyear + "/" + dmonth + "/server-descriptors/"
                    + scrubbedHash.substring(0, 1) + "/" + scrubbedHash.substring(1, 2) + "/" + scrubbedHash);
            this.logger.finer("Storing server descriptor " + newFile.getAbsolutePath());
            newFile.getParentFile().mkdirs();
            BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
            bw.write(scrubbedDesc);
            bw.close();
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not store unsanitized server " + "descriptor.", e);
        }
    }

    public void storeSanitizedExtraInfoDescriptor(byte[] data) {
        try {
            String ascii = new String(data, "US-ASCII");
            BufferedReader br2 = new BufferedReader(new StringReader(ascii));
            StringBuilder sb = new StringBuilder();
            String line2 = null, published = null;
            String hashedBridgeIdentity = null;
            DescriptorMapping mapping = null;
            while ((line2 = br2.readLine()) != null) {
                if (mapping == null && published != null && hashedBridgeIdentity != null) {
                    String mappingKey = (hashedBridgeIdentity + "," + published).toLowerCase();
                    if (this.bridgeDescriptorMappings.containsKey(mappingKey)) {
                        mapping = this.bridgeDescriptorMappings.get(mappingKey);
                    } else {
                        mapping = new DescriptorMapping(hashedBridgeIdentity.toLowerCase(), published);
                        this.bridgeDescriptorMappings.put(mappingKey, mapping);
                    }
                }
                if (line2.startsWith("extra-info ")) {
                    hashedBridgeIdentity = line2.split(" ")[2];
                    sb.append("extra-info Unnamed " + hashedBridgeIdentity + "\n");
                } else if (line2.startsWith("published ")) {
                    sb.append(line2 + "\n");
                    published = line2.substring("published ".length());
                    this.descriptorPublicationTimes.add(published);
                } else {
                    sb.append(line2 + "\n");
                }
            }
            br2.close();
            String scrubbedDesc = sb.toString();
            String scrubbedHash = DigestUtils.shaHex(scrubbedDesc);
            mapping.extraInfoDescriptorIdentifier = scrubbedHash;
            String dyear = published.substring(0, 4);
            String dmonth = published.substring(5, 7);
            File newFile = new File(this.sanitizedBridgesDir + "/" + dyear + "/" + dmonth + "/extra-infos/"
                    + scrubbedHash.substring(0, 1) + "/" + scrubbedHash.substring(1, 2) + "/" + scrubbedHash);
            this.logger.finer("Storing extra-info descriptor " + newFile.getAbsolutePath());
            newFile.getParentFile().mkdirs();
            BufferedWriter bw = new BufferedWriter(new FileWriter(newFile));
            bw.write(scrubbedDesc);
            bw.close();
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not store sanitized " + "extra-info descriptor.", e);
        }
    }

    private void rewriteNetworkStatus(File status, String published) {
        try {
            FileInputStream fis = new FileInputStream(status);
            BufferedInputStream bis = new BufferedInputStream(fis);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            int len;
            byte[] data2 = new byte[1024];
            while ((len = bis.read(data2, 0, 1024)) >= 0) {
                baos.write(data2, 0, len);
            }
            fis.close();
            byte[] allData = baos.toByteArray();
            this.storeSanitizedNetworkStatus(allData, published);
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not rewrite network " + "status.", e);
        }
    }

    private void rewriteServerDescriptor(DescriptorMapping mapping) {
        try {
            String dyear = mapping.published.substring(0, 4);
            String dmonth = mapping.published.substring(5, 7);
            File serverDescriptorFile = new File(this.sanitizedBridgesDir + "/" + dyear + "/" + dmonth
                    + "/server-descriptors/" + mapping.serverDescriptorIdentifier.substring(0, 1) + "/"
                    + mapping.serverDescriptorIdentifier.substring(1, 2) + "/"
                    + mapping.serverDescriptorIdentifier);
            FileInputStream fis = new FileInputStream(serverDescriptorFile);
            BufferedInputStream bis = new BufferedInputStream(fis);
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            int len;
            byte[] data2 = new byte[1024];
            while ((len = bis.read(data2, 0, 1024)) >= 0) {
                baos.write(data2, 0, len);
            }
            fis.close();
            byte[] allData = baos.toByteArray();
            this.storeSanitizedServerDescriptor(allData);
            serverDescriptorFile.delete();
            this.logger.finer("Deleting server descriptor " + serverDescriptorFile.getAbsolutePath());
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not rewrite server " + "descriptor.", e);
        }
    }

    /**
     * Rewrite all network statuses that might contain references to server
     * descriptors we added or updated in this execution. This applies to
     * all statuses that have been published up to 24 hours after any added
     * or updated server descriptor.
     */
    public void finishWriting() {

        /* Prepare parsing and formatting timestamps. */
        SimpleDateFormat dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
        dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
        SimpleDateFormat statusFileFormat = new SimpleDateFormat("yyyyMMdd-HHmmss");
        statusFileFormat.setTimeZone(TimeZone.getTimeZone("UTC"));

        /* Iterate over publication timestamps of previously sanitized
         * descriptors. For every publication timestamp, we want to re-write
         * the network statuses that we published up to 24 hours after that
         * descriptor. We keep the timestamp of the last re-written network
         * status in order to make sure we re-writing any network status at
         * most once. */
        String lastDescriptorPublishedPlus24Hours = "1970-01-01 00:00:00";
        for (String published : this.descriptorPublicationTimes) {
            if (published.compareTo(lastDescriptorPublishedPlus24Hours) <= 0) {
                continue;
            }
            // find statuses 24 hours after published
            SortedSet<File> statusesToRewrite = new TreeSet<File>();
            long publishedTime;
            try {
                publishedTime = dateTimeFormat.parse(published).getTime();
            } catch (ParseException e) {
                this.logger.log(Level.WARNING,
                        "Could not parse publication " + "timestamp '" + published + "'. Skipping.", e);
                continue;
            }
            String[] dayOne = dateFormat.format(publishedTime).split("-");

            File publishedDayOne = new File(
                    this.sanitizedBridgesDir + "/" + dayOne[0] + "/" + dayOne[1] + "/statuses/" + dayOne[2]);
            if (publishedDayOne.exists()) {
                statusesToRewrite.addAll(Arrays.asList(publishedDayOne.listFiles()));
            }
            long plus24Hours = publishedTime + 24L * 60L * 60L * 1000L;
            lastDescriptorPublishedPlus24Hours = dateFormat.format(plus24Hours);
            String[] dayTwo = dateFormat.format(plus24Hours).split("-");
            File publishedDayTwo = new File(
                    this.sanitizedBridgesDir + "/" + dayTwo[0] + "/" + dayTwo[1] + "/statuses/" + dayTwo[2]);
            if (publishedDayTwo.exists()) {
                statusesToRewrite.addAll(Arrays.asList(publishedDayTwo.listFiles()));
            }
            for (File status : statusesToRewrite) {
                String statusPublished = status.getName().substring(0, 15);
                long statusTime;
                try {
                    statusTime = statusFileFormat.parse(statusPublished).getTime();
                } catch (ParseException e) {
                    this.logger.log(Level.WARNING, "Could not parse network " + "status publication timestamp '"
                            + published + "'. Skipping.", e);
                    continue;
                }
                if (statusTime < publishedTime || statusTime > plus24Hours) {
                    continue;
                }
                this.rewriteNetworkStatus(status, dateTimeFormat.format(statusTime));
            }
        }

        /* Write descriptor mappings to disk. */
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(this.bridgeDescriptorMappingsFile));
            for (DescriptorMapping mapping : this.bridgeDescriptorMappings.values()) {
                bw.write(mapping.toString() + "\n");
            }
            bw.close();
        } catch (IOException e) {
            this.logger.log(Level.WARNING, "Could not write descriptor " + "mappings to disk.", e);
        }
    }
}