Java tutorial
/** * Copyright (c) 2006-2010 Berlin Brown and botnode.com/Berlin Research All Rights Reserved * * http://www.opensource.org/licenses/bsd-license.php * All rights reserved. * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the Botnode.com (Berlin Brown) nor * the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Date: 1/23/2010 * Description: Social Networking Site Document Analysis * Home Page: http://botnode.com/ * * Contact: Berlin Brown <berlin dot brown at gmail.com> */ //package org.bresearch.websec.utils.botlist.text; import java.net.MalformedURLException; import java.net.URL; /** * Generic text manipulation utilities. * * @author bbrown */ public class TextUtils { /** * Using the java API URL class, extract the http/https * hostname. * * e.g: http://www.google.com/search will return http://www.google.com * * @return */ public String getHTTPHostname(final String urlStr) { try { URL url = new URL(urlStr); String curHostname = url.getHost(); String scheme = url.getProtocol(); String fullNewURL = ""; if (scheme.equalsIgnoreCase("http") || scheme.equalsIgnoreCase("https")) { fullNewURL = scheme + "://" + curHostname; return fullNewURL; } else { throw new MalformedURLException(); } } catch (MalformedURLException e) { return "invalid-hostname"; } } }