Java tutorial
/* Copyright 2010 The Tor Project * See LICENSE for licensing information */ package org.torproject.ernie.db; import java.io.*; import java.text.*; import java.util.*; import java.util.logging.*; import org.apache.commons.codec.digest.*; import org.apache.commons.codec.binary.*; /** * Parses relay descriptors including network status consensuses and * votes, server and extra-info descriptors, and passes the results to the * stats handlers, to the archive writer, or to the relay descriptor * downloader. */ public class RelayDescriptorParser { /** * Stats file handler that accepts parse results for directory request * statistics. */ private DirreqStatsFileHandler dsfh; /** * Stats file handler that accepts parse results for consensus * statistics. */ private ConsensusStatsFileHandler csfh; /** * Stats file handler that accepts parse results for bridge statistics. */ private BridgeStatsFileHandler bsfh; /** * File writer that writes descriptor contents to files in a * directory-archive directory structure. */ private ArchiveWriter aw; /** * Missing descriptor downloader that uses the parse results to learn * which descriptors we are missing and want to download. */ private RelayDescriptorDownloader rdd; /** * Relay descriptor database importer that stores relay descriptor * contents for later evaluation. */ private RelayDescriptorDatabaseImporter rddi; private ConsensusHealthChecker chc; /** * Logger for this class. */ private Logger logger; private SimpleDateFormat dateTimeFormat; /** * Initializes this class. */ public RelayDescriptorParser(ConsensusStatsFileHandler csfh, BridgeStatsFileHandler bsfh, DirreqStatsFileHandler dsfh, ArchiveWriter aw, RelayDescriptorDatabaseImporter rddi, ConsensusHealthChecker chc) { this.csfh = csfh; this.bsfh = bsfh; this.dsfh = dsfh; this.aw = aw; this.rddi = rddi; this.chc = chc; /* Initialize logger. */ this.logger = Logger.getLogger(RelayDescriptorParser.class.getName()); this.dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); this.dateTimeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); } public void setRelayDescriptorDownloader(RelayDescriptorDownloader rdd) { this.rdd = rdd; } public void parse(byte[] data) { try { /* Convert descriptor to ASCII for parsing. This means we'll lose * the non-ASCII chars, but we don't care about them for parsing * anyway. */ BufferedReader br = new BufferedReader(new StringReader(new String(data, "US-ASCII"))); String line = br.readLine(); if (line == null) { this.logger.fine("We were given an empty descriptor for " + "parsing. Ignoring."); return; } SimpleDateFormat parseFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); parseFormat.setTimeZone(TimeZone.getTimeZone("UTC")); if (line.equals("network-status-version 3")) { // TODO when parsing the current consensus, check the fresh-until // time to see when we switch from hourly to half-hourly // consensuses boolean isConsensus = true; int exit = 0, fast = 0, guard = 0, running = 0, stable = 0; String validAfterTime = null, nickname = null, relayIdentity = null, serverDesc = null, version = null, ports = null; String fingerprint = null, dirSource = null, address = null; long validAfter = -1L, published = -1L, bandwidth = -1L, orPort = 0L, dirPort = 0L; SortedSet<String> dirSources = new TreeSet<String>(); SortedSet<String> serverDescriptors = new TreeSet<String>(); SortedSet<String> hashedRelayIdentities = new TreeSet<String>(); SortedSet<String> relayFlags = null; StringBuilder rawStatusEntry = null; while ((line = br.readLine()) != null) { if (line.equals("vote-status vote")) { isConsensus = false; } else if (line.startsWith("valid-after ")) { validAfterTime = line.substring("valid-after ".length()); validAfter = parseFormat.parse(validAfterTime).getTime(); } else if (line.startsWith("dir-source ")) { dirSource = line.split(" ")[2]; } else if (line.startsWith("vote-digest ")) { dirSources.add(dirSource); } else if (line.startsWith("fingerprint ")) { fingerprint = line.split(" ")[1]; } else if (line.startsWith("r ")) { if (isConsensus && relayIdentity != null && this.rddi != null) { byte[] rawDescriptor = rawStatusEntry.toString().getBytes(); this.rddi.addStatusEntry(validAfter, nickname, relayIdentity, serverDesc, published, address, orPort, dirPort, relayFlags, version, bandwidth, ports, rawDescriptor); relayFlags = null; version = null; bandwidth = -1L; ports = null; } rawStatusEntry = new StringBuilder(line + "\n"); String[] parts = line.split(" "); if (parts.length < 9) { this.logger.log(Level.WARNING, "Could not parse r line '" + line + "' in descriptor. Skipping."); break; } String publishedTime = parts[4] + " " + parts[5]; nickname = parts[1]; relayIdentity = Hex.encodeHexString(Base64.decodeBase64(parts[2] + "=")).toLowerCase(); serverDesc = Hex.encodeHexString(Base64.decodeBase64(parts[3] + "=")).toLowerCase(); serverDescriptors.add(publishedTime + "," + relayIdentity + "," + serverDesc); hashedRelayIdentities .add(DigestUtils.shaHex(Base64.decodeBase64(parts[2] + "=")).toUpperCase()); published = parseFormat.parse(parts[4] + " " + parts[5]).getTime(); address = parts[6]; orPort = Long.parseLong(parts[7]); dirPort = Long.parseLong(parts[8]); } else if (line.startsWith("s ") || line.equals("s")) { rawStatusEntry.append(line + "\n"); if (line.contains(" Running")) { exit += line.contains(" Exit") ? 1 : 0; fast += line.contains(" Fast") ? 1 : 0; guard += line.contains(" Guard") ? 1 : 0; stable += line.contains(" Stable") ? 1 : 0; running++; } relayFlags = new TreeSet<String>(); if (line.length() > 2) { for (String flag : line.substring(2).split(" ")) { relayFlags.add(flag); } } } else if (line.startsWith("v ")) { rawStatusEntry.append(line + "\n"); version = line.substring(2); } else if (line.startsWith("w ")) { rawStatusEntry.append(line + "\n"); String[] parts = line.split(" "); for (String part : parts) { if (part.startsWith("Bandwidth=")) { bandwidth = Long.parseLong(part.substring("Bandwidth=".length())); } } } else if (line.startsWith("p ")) { rawStatusEntry.append(line + "\n"); ports = line.substring(2); } } if (isConsensus) { if (this.rddi != null) { this.rddi.addConsensus(validAfter, data); if (relayIdentity != null) { byte[] rawDescriptor = rawStatusEntry.toString().getBytes(); this.rddi.addStatusEntry(validAfter, nickname, relayIdentity, serverDesc, published, address, orPort, dirPort, relayFlags, version, bandwidth, ports, rawDescriptor); } } if (this.bsfh != null) { for (String hashedRelayIdentity : hashedRelayIdentities) { this.bsfh.addHashedRelay(hashedRelayIdentity); } } if (this.csfh != null) { this.csfh.addConsensusResults(validAfterTime, exit, fast, guard, running, stable); } if (this.rdd != null) { this.rdd.haveParsedConsensus(validAfterTime, dirSources, serverDescriptors); } if (this.aw != null) { this.aw.storeConsensus(data, validAfter); } if (this.chc != null) { this.chc.processConsensus(validAfterTime, data); } } else { if (this.rddi != null) { this.rddi.addVote(validAfter, dirSource, data); } if (this.rdd != null) { this.rdd.haveParsedVote(validAfterTime, fingerprint, serverDescriptors); } if (this.aw != null) { String ascii = new String(data, "US-ASCII"); String startToken = "network-status-version "; String sigToken = "directory-signature "; int start = ascii.indexOf(startToken); int sig = ascii.indexOf(sigToken); if (start >= 0 && sig >= 0 && sig > start) { sig += sigToken.length(); byte[] forDigest = new byte[sig - start]; System.arraycopy(data, start, forDigest, 0, sig - start); String digest = DigestUtils.shaHex(forDigest).toUpperCase(); if (this.aw != null) { this.aw.storeVote(data, validAfter, dirSource, digest); } } } if (this.chc != null) { this.chc.processVote(validAfterTime, dirSource, data); } } } else if (line.startsWith("router ")) { String platformLine = null, publishedLine = null, publishedTime = null, bandwidthLine = null, extraInfoDigest = null, relayIdentifier = null; String[] parts = line.split(" "); String nickname = parts[1]; String address = parts[2]; int orPort = Integer.parseInt(parts[3]); int dirPort = Integer.parseInt(parts[4]); long published = -1L, uptime = -1L; while ((line = br.readLine()) != null) { if (line.startsWith("platform ")) { platformLine = line; } else if (line.startsWith("published ")) { publishedTime = line.substring("published ".length()); published = parseFormat.parse(publishedTime).getTime(); } else if (line.startsWith("opt fingerprint") || line.startsWith("fingerprint")) { relayIdentifier = line.substring( line.startsWith("opt ") ? "opt fingerprint".length() : "fingerprint".length()) .replaceAll(" ", "").toLowerCase(); } else if (line.startsWith("bandwidth ")) { bandwidthLine = line; } else if (line.startsWith("opt extra-info-digest ") || line.startsWith("extra-info-digest ")) { extraInfoDigest = line.startsWith("opt ") ? line.split(" ")[2].toLowerCase() : line.split(" ")[1].toLowerCase(); } else if (line.startsWith("uptime ")) { uptime = Long.parseLong(line.substring("uptime ".length())); } } String ascii = new String(data, "US-ASCII"); String startToken = "router "; String sigToken = "\nrouter-signature\n"; int start = ascii.indexOf(startToken); int sig = ascii.indexOf(sigToken) + sigToken.length(); String digest = null; if (start >= 0 || sig >= 0 || sig > start) { byte[] forDigest = new byte[sig - start]; System.arraycopy(data, start, forDigest, 0, sig - start); digest = DigestUtils.shaHex(forDigest); } if (this.aw != null && digest != null) { this.aw.storeServerDescriptor(data, digest, published); } if (this.rdd != null && digest != null) { this.rdd.haveParsedServerDescriptor(publishedTime, relayIdentifier, digest, extraInfoDigest); } if (this.rddi != null && digest != null) { String[] bwParts = bandwidthLine.split(" "); long bandwidthAvg = Long.parseLong(bwParts[1]); long bandwidthBurst = Long.parseLong(bwParts[2]); long bandwidthObserved = Long.parseLong(bwParts[3]); String platform = platformLine.substring("platform ".length()); this.rddi.addServerDescriptor(digest, nickname, address, orPort, dirPort, relayIdentifier, bandwidthAvg, bandwidthBurst, bandwidthObserved, platform, published, uptime, extraInfoDigest, data); } } else if (line.startsWith("extra-info ")) { String nickname = line.split(" ")[1]; String publishedTime = null, relayIdentifier = line.split(" ")[2]; long published = -1L; String dir = line.split(" ")[2]; String statsEnd = null; long seconds = -1L; SortedMap<String, String> bandwidthHistory = new TreeMap<String, String>(); boolean skip = false; while ((line = br.readLine()) != null) { if (line.startsWith("published ")) { publishedTime = line.substring("published ".length()); published = parseFormat.parse(publishedTime).getTime(); } else if (line.startsWith("read-history ") || line.startsWith("write-history ") || line.startsWith("dirreq-read-history ") || line.startsWith("dirreq-write-history ")) { String[] parts = line.split(" "); if (parts.length == 6) { String type = parts[0]; String intervalEndTime = parts[1] + " " + parts[2]; long intervalEnd = dateTimeFormat.parse(intervalEndTime).getTime(); if (Math.abs(published - intervalEnd) > 7L * 24L * 60L * 60L * 1000L) { this.logger.fine("Extra-info descriptor publication time " + publishedTime + " and last interval time " + intervalEndTime + " in " + type + " line differ by " + "more than 7 days! Not adding this line!"); continue; } try { long intervalLength = Long.parseLong(parts[3].substring(1)); String[] values = parts[5].split(","); for (int i = values.length - 1; i >= 0; i--) { Long.parseLong(values[i]); bandwidthHistory.put(intervalEnd + "," + type, intervalEnd + "," + type + "," + values[i]); intervalEnd -= intervalLength * 1000L; } } catch (NumberFormatException e) { this.logger.log(Level.WARNING, "Could not parse " + line.split(" ")[0] + " line '" + line + "' in " + "descriptor. Skipping.", e); break; } } } else if (line.startsWith("dirreq-stats-end ")) { String[] parts = line.split(" "); if (parts.length < 5) { this.logger.warning("Could not parse dirreq-stats-end " + "line '" + line + "' in descriptor. Skipping."); break; } statsEnd = parts[1] + " " + parts[2]; seconds = Long.parseLong(parts[3].substring(1)); } else if (line.startsWith("dirreq-v3-reqs ") && line.length() > "dirreq-v3-reqs ".length()) { if (this.dsfh != null) { try { int allUsers = 0; Map<String, String> obs = new HashMap<String, String>(); String[] parts = line.substring("dirreq-v3-reqs ".length()).split(","); for (String p : parts) { String country = p.substring(0, 2); int users = Integer.parseInt(p.substring(3)) - 4; allUsers += users; obs.put(country, "" + users); } obs.put("zy", "" + allUsers); this.dsfh.addObs(dir, statsEnd, seconds, obs); } catch (NumberFormatException e) { this.logger.log(Level.WARNING, "Could not parse " + "dirreq-v3-reqs line '" + line + "' in descriptor. " + "Skipping.", e); break; } } } } String ascii = new String(data, "US-ASCII"); String startToken = "extra-info "; String sigToken = "\nrouter-signature\n"; String digest = null; int start = ascii.indexOf(startToken); int sig = ascii.indexOf(sigToken) + sigToken.length(); if (start >= 0 || sig >= 0 || sig > start) { byte[] forDigest = new byte[sig - start]; System.arraycopy(data, start, forDigest, 0, sig - start); digest = DigestUtils.shaHex(forDigest); } if (this.aw != null && digest != null) { this.aw.storeExtraInfoDescriptor(data, digest, published); } if (this.rdd != null && digest != null) { this.rdd.haveParsedExtraInfoDescriptor(publishedTime, relayIdentifier.toLowerCase(), digest); } if (this.rddi != null && digest != null) { this.rddi.addExtraInfoDescriptor(digest, nickname, dir.toLowerCase(), published, data, bandwidthHistory); } } } catch (IOException e) { this.logger.log(Level.WARNING, "Could not parse descriptor. " + "Skipping.", e); } catch (ParseException e) { this.logger.log(Level.WARNING, "Could not parse descriptor. " + "Skipping.", e); } } }