Java tutorial
/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.io; import static org.junit.Assert.assertTrue; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; import java.net.MalformedURLException; import java.net.URL; import java.net.UnknownHostException; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.Stack; import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.util.StringUtils; import org.commoncrawl.util.IPAddressUtils; import org.commoncrawl.util.MovingAverage; import org.commoncrawl.util.URLUtils; import org.junit.Test; /** * * @author rana * */ public class NIODNSCache { public static class DNSResult { int _ipV4Address; long _ttl; String _cName; DNSResult(int ipAddress, long ipAddressTTL, String cName) { _ipV4Address = ipAddress; _ttl = ipAddressTTL; _cName = cName; } public String getCannonicalName() { return _cName; } public int getIPAddress() { return _ipV4Address; } public long getTTL() { return _ttl; } } public static interface LoadFilter { boolean loadItem(String hostName, String ipAddress, String cName, long expireTime, long lastTouchedTime); String validateName(String hostName); } public static class Node { private static final int GROWTH_FACTOR = 1; // Like public version, but without range checks. private static int binarySearch(Node[] a, int fromIndex, int toIndex, char key) { int low = fromIndex; int high = toIndex - 1; while (low <= high) { int mid = (low + high) >>> 1; Node midVal = a[mid]; int cmp = midVal.compareTo(key); if (cmp < 0) low = mid + 1; else if (cmp > 0) high = mid - 1; else return mid; // key found } return -(low + 1); // key not found. } private char _nodeChar; private char _nodeCharArray[]; private long _expireTime = 0; private int _flags = 0; private int _count = 0; private Object _children; private int _ipAddress = 0; private String _cannonicalName = null; private Node _parent = null; /* * private Object _metadata = null; */ private long _lastTouched = -1; public static final short Flag_Is_RootNode = 1 << 0; public static final short Flag_Is_TerminalNode = 1 << 1; public static final short Flag_Is_SuperNode = 1 << 2; public static final short Flag_Is_MultiCharNode = 1 << 3; public static final short Flag_NEXT_AVAILABLE_BIT_POS = 4; private Node() { _parent = null; _flags = Flag_Is_RootNode; } public Node(Node parent, char nodeChar, int flags) { numberOfNodesChildEQ1++; _nodeChar = nodeChar; _flags = (short) flags; _parent = parent; } public final void clearFlag(short flag) { _flags &= ~flag; } public final int compareTo(char c) { if (_nodeChar < c) return -1; else if (_nodeChar > c) return 1; else return 0; } public final Node findOrAddChild(NIODNSCache cacheObject, char nodeChar, boolean addChild) { int itemPosition = -1; if (_count == 1) { if (((Node) _children).getNodeChar() == nodeChar) { itemPosition = 0; } else if (((Node) _children).getNodeChar() < nodeChar) { itemPosition = -2; } } else if (_count > 1) { itemPosition = binarySearch((Node[]) _children, 0, _count, nodeChar); } // lt 0 means item was not found ... if (itemPosition < 0 && addChild) { // allocate a new Node Node newNode = new Node(this, nodeChar, 0); // increment stats ... cacheObject.numberOfNodes++; // normalize item positoin ... itemPosition = Math.abs(itemPosition + 1); // check some bounding conditions. if (_count == 0) { _children = newNode; } else { // get some basic bounding information established ... int leftCopyItems = itemPosition; int rightCopyItems = _count - itemPosition; Node copyArray[] = (_count == 1) ? null : (Node[]) _children; // if the array is full ... if (_count == 1 || _count == ((Node[]) _children).length) { if (_count == 1) { numberOfNodesChildEQ1--; numberOfNodesChildLTEQ4++; } else if (_count == 4) { numberOfNodesChildLTEQ4--; numberOfNodesChildLTEQ8++; } else if (_count == 8) { numberOfNodesChildLTEQ8--; numberOfNodesChildGT8++; } // allocate a new array int growAmount = ((_count / GROWTH_FACTOR) + 1) * GROWTH_FACTOR; copyArray = new Node[growAmount]; } // copy lbound items only if copying into new array ... if (leftCopyItems != 0 && copyArray != null) { if (_count == 1) copyArray[0] = (Node) _children; else System.arraycopy(_children, 0, copyArray, 0, leftCopyItems); } // copy rbound items no matter what ... if (rightCopyItems != 0) { if (_count == 1) copyArray[1] = (Node) _children; else System.arraycopy(_children, itemPosition, copyArray, itemPosition + 1, rightCopyItems); } // and assign copy array to children _children = copyArray; // insert new item into array ... ((Node[]) _children)[itemPosition] = newNode; } // increment count _count++; } if (itemPosition >= 0) { return (_count == 1) ? (Node) _children : ((Node[]) _children)[itemPosition]; } return null; } public String getCannonicalName() { return _cannonicalName; } public final Node getChildAt(int index) { return (_count == 1) ? (Node) _children : ((Node[]) _children)[index]; } public final int getChildCount() { return _count; } public String getFullName() { StringBuffer nameOut = new StringBuffer(); Node currentNode = this; while (!currentNode.isRootNode()) { if (currentNode.isMultiCharNode()) { nameOut.append(currentNode.getMultiCharArray()); } nameOut.append(currentNode.getNodeChar()); currentNode = currentNode.getParentNode(); } return nameOut.toString(); } public final int getIPAddress() { return _ipAddress; } /** get last touched time **/ public final long getLastTouchedTime() { return _lastTouched; } public final char[] getMultiCharArray() { return _nodeCharArray; } public final char getNodeChar() { return _nodeChar; } public Node getParentNode() { return _parent; } public final long getTimeToLive() { return _expireTime; } public final boolean isFlagSet(short flag) { return (_flags & flag) != 0; } public final boolean isMultiCharNode() { return (_flags & Flag_Is_MultiCharNode) != 0; } public boolean isRootNode() { return (_flags & Flag_Is_RootNode) != 0; } public final boolean isSuperNode() { return (_flags & Flag_Is_SuperNode) != 0; } public final boolean isTerminalNode() { return (_flags & Flag_Is_TerminalNode) != 0; } public void markAsMultiCharNode(char s[], int startOffset, int length) { if (length == 0) { throw new RuntimeException(); } _flags |= Flag_Is_MultiCharNode; _nodeCharArray = new char[length]; System.arraycopy(s, startOffset, _nodeCharArray, 0, length); } /* * public final void setMetadata(Object metadata) { _metadata = metadata; } * * public final Object getMetadata() { return _metadata; } */ public final void markAsSuperNode() { _flags |= Flag_Is_SuperNode; } public final void markAsTerminalNode() { _flags |= Flag_Is_TerminalNode; } public final void removeChildAt(int index) { if (index >= _count) throw new RuntimeException("Invalid Index"); if (_count == 1) { _children = null; } else { int rightOfIndexCount = _count - (index + 1); if (rightOfIndexCount > 0) { if (_count > 2) System.arraycopy(_children, index + 1, _children, index, rightOfIndexCount); else _children = ((Node[]) _children)[1]; } else { if (_count == 2) { _children = ((Node[]) _children)[0]; } } } _count--; } public final void setFlag(short flag) { _flags |= flag; } public final void setIPAddress(int address) { _ipAddress = address; } /** update last touched time **/ public final void setLastTouchedTime(long timeInMilliseconds) { _lastTouched = timeInMilliseconds; } public final void setTimeToLive(long ttl) { _expireTime = ttl; } public Node splitMultiCharNodeAt(NIODNSCache cacheObject, int splitIdx) { // allocate new intermediate node Node newIntermediateNode = new Node(this, _nodeCharArray[splitIdx], 0); // increment stats ... cacheObject.numberOfNodes++; newIntermediateNode._ipAddress = this._ipAddress; // newOuterNode._metadata = this._metadata; newIntermediateNode._expireTime = this._expireTime; newIntermediateNode._cannonicalName = this._cannonicalName; this._ipAddress = 0; // this._metadata = null; newIntermediateNode._flags = (short) (this._flags & ~Flag_Is_MultiCharNode); this._flags = 0; this._cannonicalName = null; int leftOfSplitLength = splitIdx; int rightOfSplitLength = _nodeCharArray.length - splitIdx - 1; if (leftOfSplitLength != 0) { newIntermediateNode.markAsMultiCharNode(_nodeCharArray, 0, leftOfSplitLength); } if (rightOfSplitLength != 0) { char array[] = new char[rightOfSplitLength]; System.arraycopy(_nodeCharArray, splitIdx + 1, array, 0, rightOfSplitLength); _nodeCharArray = array; _flags |= Flag_Is_MultiCharNode; } else { _nodeCharArray = null; _flags &= ~Flag_Is_MultiCharNode; } // absorb the old node's children ... newIntermediateNode._children = this._children; newIntermediateNode._count = this._count; // and re-parent our children to point to new intermediate branch node for (int i = 0; i < newIntermediateNode.getChildCount(); ++i) { newIntermediateNode.getChildAt(i)._parent = newIntermediateNode; } // make the new node the old node's only child ... this._children = newIntermediateNode; this._count = 1; return newIntermediateNode; } } public static interface NodeDumpFilter { boolean dumpTerminalNode(Node node); } private static final class TreePosition { private Node _node; private int _index; TreePosition(Node node, int index) { _node = node; _index = index; } public final int getIndex() { return _index; } public final Node getNode() { return _node; } public final Node resolve() { if (_index < _node.getChildCount()) { return _node.getChildAt(_index); } return null; } public final void setIndex(int index) { _index = index; } } private static final Log LOG = LogFactory.getLog(NIODNSCache.class); private Node _root = new Node(); private Node _ipRoot = new Node(); private Stack<TreePosition> _iterationStack = new Stack<TreePosition>(); private TreeSet<String> _cannonicalNames = new TreeSet<String>(); private long numberOfNodes = 0; private long numberOfNodesRemoved = 0; private boolean enableIPAddressTracking = false; public MovingAverage _dnsAddToCacheTime = new MovingAverage(25); public MovingAverage _dnsLookupFromCacheTime = new MovingAverage(25); private static final int searchMode_NodeChar = 1; private static final int searchMode_MultiNodeChar = 2; static int numberOfNodesChildEQ1 = 0; static int numberOfNodesChildLTEQ4 = 0; static int numberOfNodesChildLTEQ8 = 0; static int numberOfNodesChildGT8 = 0; public static void main(String[] args) { File file = new File(args[0]); InputStream stream; NIODNSCache dnsCache = new NIODNSCache(); try { stream = new FileInputStream(file); long timeStart = System.currentTimeMillis(); LOG.info("Loading Tree from Stream"); dnsCache.loadTree(stream, null); long timeEnd = System.currentTimeMillis(); LOG.info("Load took:" + (timeEnd - timeStart) + " NodeCount:" + dnsCache.getActiveNodeCount()); LOG.info("Pruning cache based on filter (TTL)"); timeStart = System.currentTimeMillis(); ByteArrayOutputStream streamOut = new ByteArrayOutputStream(1024 * 1024 * 100); dnsCache.dumpNameTree(streamOut, new NIODNSCache.NodeDumpFilter() { @Override public boolean dumpTerminalNode(Node node) { if (node.getTimeToLive() >= System.currentTimeMillis()) { return true; } return false; } }); timeEnd = System.currentTimeMillis(); LOG.info("Dump took:" + (timeEnd - timeStart)); LOG.info("Reloading cache from stream"); timeStart = System.currentTimeMillis(); dnsCache = new NIODNSCache(); dnsCache.loadTree(new ByteArrayInputStream(streamOut.toByteArray()), null); timeEnd = System.currentTimeMillis(); LOG.info("Reload took:" + (timeEnd - timeStart) + "NodeCount:" + dnsCache.getActiveNodeCount()); /* * * LOG.info("Collecting terminal IP nodes"); List<Node> terminalIPNodes = * new Vector<Node>(); dnsCache.collectTerminalNodes(dnsCache._ipRoot, * terminalIPNodes); LOG.info("Sorting nodes"); * Collections.sort(terminalIPNodes,new Comparator<NIODNSCache.Node>() { * * @Override public int compare(Node o1, Node o2) { return * o1.getTimeToLive() > o2.getTimeToLive() ? -1 : o1.getTimeToLive() < * o2.getTimeToLive() ? 1 : 0; } }); LOG.info("Top 1000 Hot Nodes"); int * maxNodes = Math.min(1000, terminalIPNodes.size()); for (int * i=0;i<maxNodes;++i) { LOG.info("Node:" + * terminalIPNodes.get(i).getFullName() + " CName:" + * terminalIPNodes.get(i).getCannonicalName() + " HitCount:" + * terminalIPNodes.get(i).getTimeToLive()); } */ /* * LOG.info("Dumping IP Tree to stdout"); * dnsCache.dumpIPAddressTree(System.out); * * LOG.info("Dumping Name Tree to stdout"); * dnsCache.dumpNameTree(System.out); */ } catch (IOException e) { e.printStackTrace(); } } public NIODNSCache() { _iterationStack.ensureCapacity(1024); } private synchronized Node _findNode(Node rootNode, String nodeName) { String s = nodeName.toLowerCase(); Node node = rootNode; if (s.length() > 0) { Node lastSubTerminalNode = null; for (int i = s.length() - 1; i >= 0 && node != null; i--) { if (s.charAt(i) == '.' && node != _root) { lastSubTerminalNode = node; } node = node.findOrAddChild(this, s.charAt(i), false); // now if this is a multi-char node ... if (node != null && node.isMultiCharNode()) { int innerScanPos = i - 1; int multiCharArrayScanPos = node.getMultiCharArray().length - 1; while (innerScanPos >= 0 && multiCharArrayScanPos >= 0) { if (s.charAt(innerScanPos) == node.getMultiCharArray()[multiCharArrayScanPos]) { innerScanPos--; multiCharArrayScanPos--; } else { break; } } // now first condition for a successfull match is than we completely // scanned the mutli-char array ... if (multiCharArrayScanPos == -1) { // now if char scan also reached the beginning of the search string // ... if (innerScanPos == -1) { // this is a true match ... return (node.isTerminalNode()) ? node : null; } // otherwise ... check the character at new position ... else { // set i to inner scan pos + 1 (so that out loop will properly // adjust value) i = innerScanPos + 1; } } else { node = null; break; } } } if (node == null && lastSubTerminalNode != null) { if (lastSubTerminalNode.isSuperNode()) { node = lastSubTerminalNode; } } if (node != null) return (node.isTerminalNode()) ? node : null; } return null; } private void addIPToNameNode(int ipAddress, String hostName) { String ipAddressStr = IPAddressUtils.IntegerToIPAddressString(ipAddress); ; // create or access ip address node ... Node ipAddressNode = addNode(_ipRoot, ipAddressStr); // increment hit count ipAddressNode._expireTime++; // get previous cname for ip address node String previousHostName = ipAddressNode._cannonicalName; if (previousHostName == null) { setCannonicalNameForNode(ipAddressNode, hostName); } else { // if not equal .... if (previousHostName.compareTo(hostName) != 0) { // find common denominator ... String previousTLDName = URLUtils.extractRootDomainName(previousHostName); String currentTLDName = URLUtils.extractRootDomainName(hostName); // if tld name lengths match ... if (previousTLDName != null && currentTLDName != null && previousTLDName.length() == currentTLDName.length()) { // and if previous entry is NOT the TLD Name entry .. . if (previousTLDName.length() != previousHostName.length()) { // ok time to do a comparison of tld names if (previousTLDName.compareTo(currentTLDName) == 0) { // if new name is a tld name ... if (currentTLDName.length() == hostName.length()) { // use current tld name ... setCannonicalNameForNode(ipAddressNode, hostName); } else { // now extract super domain name String previousSubDomainNameParts[] = previousHostName .substring(0, previousHostName.length() - previousTLDName.length()) .split("\\."); String newSubDomainNameParts[] = hostName .substring(0, hostName.length() - currentTLDName.length()).split("\\."); int partsToCompare = Math.min(previousSubDomainNameParts.length, newSubDomainNameParts.length); int i = 0; for (i = 0; i < partsToCompare; ++i) { if (previousSubDomainNameParts[previousSubDomainNameParts.length - (i + 1)] .compareTo(newSubDomainNameParts[newSubDomainNameParts.length - (i + 1)]) != 0) break; } if (i == 0) { setCannonicalNameForNode(ipAddressNode, previousTLDName); } else if (i < previousSubDomainNameParts.length) { StringBuffer builder = new StringBuffer(); for (int j = i; j < previousSubDomainNameParts.length; ++j) { builder.append(previousSubDomainNameParts[j]); builder.append("."); } builder.append(previousTLDName); setCannonicalNameForNode(ipAddressNode, builder.toString()); } } } } } } } } /** add a name node **/ public Node addNameNode(String nodeName) { return addNode(_root, nodeName); } /** underlying routine shared by name and ip lookup routines **/ private Node addNode(Node rootNode, String path) { // search super node in reverse order ... String s = path.toLowerCase(); Node node = rootNode; if (s.length() > 0) { int multiNodeIdx = -1; int searchMode = searchMode_NodeChar; for (int i = s.length() - 1; i >= 0; i--) { if (searchMode == searchMode_NodeChar) { // find the node for the next character ... node = node.findOrAddChild(this, s.charAt(i), true); // update/set the node's ttl // node.setTimeToLive(Math.max(node.getTimeToLive(),ttl)); // if the returned node is a multi-char node, search into the node ... if (node.isMultiCharNode()) { searchMode = searchMode_MultiNodeChar; multiNodeIdx = node.getMultiCharArray().length - 1; } // otherwise... if the node has no children and the index is not zero // and current char is not '.' token ... else if (node.getChildCount() == 0 && i != 0 && s.charAt(i) != '.' && !node.isTerminalNode()) { int multiNodeScanStart = i - 1; int multiNodeCharEndPos = multiNodeScanStart; // walk backwards until either end of string is reached, or a '.' // token is located ... while (multiNodeCharEndPos >= 0 && s.charAt(multiNodeCharEndPos) != '.') multiNodeCharEndPos--; // if we actually accumulated something in the scan buffer ... if (multiNodeScanStart - multiNodeCharEndPos != 0) { // if scan terminated before the end of the string, then a '.' // token terminated the scan ... if (multiNodeCharEndPos != -1) { // gobble up the appropriate number of characters ... node.markAsMultiCharNode(s.toCharArray(), multiNodeCharEndPos + 1, i - (multiNodeCharEndPos + 1)); // set up i so that loop can continue ... i = multiNodeCharEndPos + 1; // and break out of multi-char scan mode ... searchMode = searchMode_NodeChar; } else { // gobble up remaining characters and covert node to multi-char // node ... node.markAsMultiCharNode(s.toCharArray(), 0, i); // and break out of loop... break; } } } } else { if (multiNodeIdx == -1 || i == -1) { throw new RuntimeException(); } if (node.getMultiCharArray()[multiNodeIdx] != s.charAt(i)) { node.splitMultiCharNodeAt(this, multiNodeIdx); i += 1; searchMode = searchMode_NodeChar; } else { multiNodeIdx -= 1; if (multiNodeIdx < 0) searchMode = searchMode_NodeChar; } } } // finally, if we are still in multi-char search mode and multiNodeIdx != // -1 // this means that we terminated a successfull match int the MIDDLE of a // multi-char node if (searchMode == searchMode_MultiNodeChar && multiNodeIdx != -1) { // in this case, we have to split the multi-char node appropriately ... node.splitMultiCharNodeAt(this, multiNodeIdx); } node.markAsTerminalNode(); return node; } return null; } public synchronized Node cacheIPAddressForHost(String hostName, int ipAddress, long ttl, String cName) { long startTime = System.currentTimeMillis(); // first add the node to the list ... Node node = addNameNode(hostName); node.setIPAddress(ipAddress); node.setTimeToLive(ttl); node.setLastTouchedTime(System.currentTimeMillis()); if (cName != null) { setCannonicalNameForNode(node, cName); Node cannonicalNameNode = addNameNode(cName); if (cannonicalNameNode != node && cannonicalNameNode.getTimeToLive() < node.getTimeToLive()) { cannonicalNameNode.setIPAddress(ipAddress); cannonicalNameNode.setTimeToLive(ttl); } } if (enableIPAddressTracking) { addIPToNameNode(ipAddress, (cName != null) ? cName : hostName); } long endTime = System.currentTimeMillis(); _dnsAddToCacheTime.addSample((endTime - startTime)); return node; } public synchronized void clear() { _root = new Node(); _ipRoot = new Node(); _cannonicalNames.clear(); _iterationStack.clear(); numberOfNodes = 0; numberOfNodesRemoved = 0; } public synchronized void collectTerminalIPNodes(List<Node> terminalNodeVector) { try { collectTerminalNodes(_ipRoot, terminalNodeVector); } catch (IOException e) { } } public synchronized void collectTerminalNodes(List<Node> terminalNodeVector) { try { collectTerminalNodes(_root, terminalNodeVector); } catch (IOException e) { } } private synchronized void collectTerminalNodes(Node node, List<Node> terminalNodeVector) throws IOException { if (node.isTerminalNode()) { terminalNodeVector.add(node); } for (int i = 0; i < node.getChildCount(); ++i) { collectTerminalNodes(node.getChildAt(i), terminalNodeVector); } } public synchronized void dumpIPAddressTree(OutputStream outputStream) throws IOException { PrintWriter printWriter = new PrintWriter(new BufferedOutputStream(outputStream)); dumpIPNode(printWriter, _ipRoot); } void dumpIPNode(PrintWriter printWriter, Node node) throws IOException { if (node.isTerminalNode()) { printWriter.append(node.getFullName()); printWriter.append(","); printWriter.append(node.getCannonicalName()); printWriter.append("\n"); } for (int i = 0; i < node.getChildCount(); ++i) { dumpIPNode(printWriter, node.getChildAt(i)); } } public synchronized void dumpNameTree(OutputStream outputStream, NodeDumpFilter filter) throws IOException { PrintWriter printWriter = new PrintWriter(new BufferedOutputStream(outputStream)); dumpNode(printWriter, _root, filter); } void dumpNode(PrintWriter printWriter, Node node, NodeDumpFilter filter) throws IOException { if (node.isTerminalNode()) { if (filter == null || filter.dumpTerminalNode(node)) { printWriter.append(node.getFullName()); printWriter.append(","); try { printWriter.append((IPAddressUtils.IntegerToInetAddress(node.getIPAddress()).toString())); } catch (UnknownHostException e) { LOG.error(StringUtils.stringifyException(e)); } printWriter.append(","); printWriter.append(Long.toString(node.getTimeToLive())); printWriter.append(","); if (node._cannonicalName != null) { printWriter.append(node._cannonicalName); } else { printWriter.append("null"); } printWriter.append(","); printWriter.append(Long.toString(node.getLastTouchedTime())); printWriter.append("\n"); } } for (int i = 0; i < node.getChildCount(); ++i) { dumpNode(printWriter, node.getChildAt(i), filter); } } private synchronized final void dumpStats() { LOG.info("Nodes Count:" + numberOfNodes); LOG.info("Nodes Removed:" + numberOfNodesRemoved); } public synchronized void enableIPAddressTracking() { enableIPAddressTracking = true; } public Node findNode(String nodeName) { Node nodeOut = _findNode(_root, nodeName); if (nodeOut != null && nodeOut.isTerminalNode()) { nodeOut.setLastTouchedTime(System.currentTimeMillis()); } return nodeOut; } public synchronized long getActiveNodeCount() { return numberOfNodes - numberOfNodesRemoved; } public synchronized DNSResult getIPAddressForHost(String hostName) { long startTime = System.currentTimeMillis(); DNSResult result = null; Node resolvedNode = findNode(hostName); String cName = null; int ipAddress = 0; long ttl = 0; if (resolvedNode != null) { if (resolvedNode.getIPAddress() != 0) { ipAddress = resolvedNode.getIPAddress(); ttl = resolvedNode.getTimeToLive(); cName = resolvedNode.getCannonicalName(); } // if cannonical name is not null and our current address has expired ... if (cName != null && ttl <= System.currentTimeMillis()) { Node cannonicalNode = findNode(cName); // if cannonical node exists and it's ttl > our ttl if (cannonicalNode != null && cannonicalNode.getTimeToLive() > resolvedNode.getTimeToLive()) { ipAddress = cannonicalNode.getIPAddress(); ttl = cannonicalNode.getTimeToLive(); resolvedNode.setIPAddress(ipAddress); resolvedNode.setTimeToLive(ttl); } } } if (ipAddress != 0) { result = new DNSResult(ipAddress, ttl, cName); } long endTime = System.currentTimeMillis(); _dnsLookupFromCacheTime.addSample((endTime - startTime)); return result; } public synchronized void loadTree(InputStream inputStream, LoadFilter loadFilter) throws IOException { int lineCount = 0; clear(); BufferedReader lineReader = new BufferedReader(new InputStreamReader(inputStream), 1024000); String line = null; while ((line = lineReader.readLine()) != null) { try { String elements[] = line.split(","); if (elements.length >= 4) { String hostName = elements[0]; String ipAddress = elements[1].substring(1); long ttl = Long.parseLong(elements[2]); String cname = null; if (!elements[3].equals("null")) { cname = elements[3]; } long lastTouched = -1; if (elements.length > 5) { lastTouched = Long.parseLong(elements[4]); } int ipAddressInteger = IPAddressUtils.IPV4AddressStrToInteger(ipAddress); if (loadFilter != null) { hostName = loadFilter.validateName(hostName); } if (loadFilter == null || loadFilter.loadItem(hostName, ipAddress, cname, ttl, lastTouched)) { cacheIPAddressForHost(hostName, ipAddressInteger, ttl, cname); } } } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } if (++lineCount % 100000 == 0) LOG.info("Processed " + lineCount + " lines"); } LOG.info("Processed a total of:" + lineCount + " lines"); } private synchronized void pruneCache() { int nodesIterated = 0; // walk the tree ... if (_iterationStack.size() == 0) { // System.out.println("Starting Iteration From Top"); Node activeNode = (_root.getChildCount() != 0) ? _root.getChildAt(0) : null; if (activeNode != null) { _iterationStack.add(new TreePosition(_root, 0)); } } else { // System.out.println("Continuing Previous Iteration"); } long startTime = System.currentTimeMillis(); while (_iterationStack.size() != 0) { TreePosition pos = _iterationStack.peek(); // resolve the next child for the current node .. may return null... Node currentNode = pos.resolve(); if (currentNode != null && currentNode.getChildCount() != 0) { _iterationStack.push(new TreePosition(currentNode, 0)); } else { // if no more children for the current parent node, operate on the // parent node itself ... if (currentNode == null) { currentNode = pos.getNode(); if (currentNode == _root) break; _iterationStack.pop(); pos = _iterationStack.peek(); } nodesIterated++; if (currentNode.getTimeToLive() <= startTime && currentNode.getChildCount() == 0) { // increment stats ... numberOfNodesRemoved++; // remove child from parent ... pos.getNode().removeChildAt(pos.getIndex()); // check to see if time is up ... if (System.currentTimeMillis() - startTime > 100) { LOG.info("Time Up. breaking out. Nodes Iterated:" + nodesIterated); dumpStats(); LOG.info("Prune Took:" + (System.currentTimeMillis() - startTime) + " MS"); return; } } else { pos.setIndex(pos.getIndex() + 1); } if (pos.getIndex() >= pos.getNode().getChildCount()) { _iterationStack.pop(); if (_iterationStack.size() != 0) { // advance parent's index ... TreePosition parentPos = _iterationStack.peek(); parentPos.setIndex(parentPos.getIndex() + 1); } } } } long timeAfterPrune = System.currentTimeMillis(); _iterationStack.removeAllElements(); LOG.info("Prune Finished Iterating Whole Tree"); dumpStats(); LOG.info("Prune Took:" + (timeAfterPrune - startTime) + " MS"); } private void setCannonicalNameForNode(Node node, String cName) { SortedSet<String> subset = _cannonicalNames.subSet(cName, cName + "\0"); if (!subset.isEmpty()) { node._cannonicalName = subset.first(); } else { node._cannonicalName = cName; _cannonicalNames.add(cName); } } @Test public void simpleTest() throws Exception { int ipAddress = IPAddressUtils.IPV4AddressStrToInteger("68.178.211.35"); cacheIPAddressForHost("www.matlockpark.com", ipAddress, 1226959057707L, "matlockpark.com"); DNSResult result = getIPAddressForHost("www.matlockpark.com"); org.junit.Assert.assertTrue(result.getCannonicalName().equals("matlockpark.com")); org.junit.Assert.assertTrue(result.getIPAddress() == ipAddress); org.junit.Assert.assertTrue(result.getTTL() == 1226959057707L); } @Test public void validateDumpCode() throws Exception { addNameNode("www.feeds.feedburner.com"); addNameNode("pictures.google.com"); addNameNode("pictures2.google.com"); addNameNode("gmail.google.com"); addNameNode("foobar.google.com"); assertTrue(findNode("feeds.feedburner.com") == null); assertTrue(findNode("www.feeds.feedburner.com") != null); assertTrue(findNode("pictures.google.com") != null); assertTrue(findNode("pictures2.google.com") != null); assertTrue(findNode("gmail.google.com") != null); assertTrue(findNode("foobar.google.com") != null); dumpNameTree(System.out, null); } @Test public void validateTrieCode() throws Exception { // addNode("www.google.com",System.currentTimeMillis() + // 60000).setMetadata("www.google.com"); // addNode("pictures.google.com",System.currentTimeMillis() + // 60000).setMetadata("pictures.google.com"); // addNode("pictures2.google.com",System.currentTimeMillis() + // 60000).setMetadata("pictures2.google.com"); // addNode("gmail.google.com",System.currentTimeMillis() + // 60000).setMetadata("gmail.google.com"); // addNode("foobar.google.com",System.currentTimeMillis() + // 60000).setMetadata("foobar.google.com"); // Node googleSuperNode = addNode("google.com",System.currentTimeMillis() + // 60000); // googleSuperNode.markAsSuperHost(); // googleSuperNode.setMetadata("google.com"); // Node wwwGoogleCom = findNode("www.google.com"); // Node productsGoogleCom = findNode("products.google.com"); // org.junit.Assert.assertTrue(wwwGoogleCom.getMetadata().equals("www.google.com")); // org.junit.Assert.assertTrue(productsGoogleCom.getMetadata().equals("google.com")); URL resourceURL = ClassLoader.getSystemResource("urls.txt"); if (resourceURL == null) { throw new FileNotFoundException(); } Set<String> hostSet = new HashSet<String>(); for (int pass = 0; pass < 1; ++pass) { System.out.println("running pass:" + pass); InputStream stream = resourceURL.openStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(stream)); String line; int lineCount = 0; while ((line = reader.readLine()) != null) { try { URL url = new URL(line); if (url.getHost() != null && url.getHost().length() != 0) { if (pass == 0) { // hostSet.add(url.getHost().toLowerCase()); cacheIPAddressForHost(url.getHost(), url.getHost().hashCode(), System.currentTimeMillis() + 5000 + (int) (Math.random() * 30000.00), Integer.toString(url.getHost().hashCode())); } else { Node node = findNode(url.getHost()); if (node != null) { if (node.getIPAddress() != url.getHost().hashCode()) { throw new RuntimeException( "Metadata Mismatch for host:" + url.getHost() + ".Excpected:" + url.getHost().hashCode() + " Got: " + node.getIPAddress()); } } else { throw new RuntimeException("Node Null! Excpected:" + url.getHost()); } } } if (++lineCount % 10000 == 0) { System.out.println("pruning mid-stream"); pruneCache(); System.out.println("sleeping a little while .."); Thread.sleep(100); System.out.println("wokeup"); // System.out.print("."); } } catch (MalformedURLException e) { } catch (Exception e) { e.printStackTrace(); } } System.out.println("done reading entries"); } System.out.println("Host Count:" + hostSet.size()); System.out.println("Number of Nodes:" + numberOfNodes); System.out.println("Number of Nodes EQ_1:" + numberOfNodesChildEQ1); System.out.println("Number of Nodes LTEQ_4:" + numberOfNodesChildLTEQ4); System.out.println("Number of Nodes LTEQ_8:" + numberOfNodesChildLTEQ8); System.out.println("Number of Nodes GT_8:" + numberOfNodesChildGT8); while (true) { System.out.println("sleeping..."); Thread.sleep(100); System.out.println("pruning.. "); pruneCache(); } } }