Java tutorial
/* This program is a part of the companion code for Core Java 8th ed. (http://horstmann.com/corejava) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * This program displays all URLs in a web page by matching a regular expression that describes the * <a href=...> HTML tag. Start the program as <br> * java HrefMatch URL * @version 1.01 2004-06-04 * @author Cay Horstmann */ public class HrefMatch { public static void main(String[] args) { try { // get URL string from command line or use default String urlString; if (args.length > 0) urlString = args[0]; else urlString = "http://java.sun.com"; // open reader for URL InputStreamReader in = new InputStreamReader(new URL(urlString).openStream()); // read contents into string builder StringBuilder input = new StringBuilder(); int ch; while ((ch = in.read()) != -1) input.append((char) ch); // search for all occurrences of pattern String patternString = "<a\\s+href\\s*=\\s*(\"[^\"]*\"|[^\\s>]*)\\s*>"; Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(input); while (matcher.find()) { int start = matcher.start(); int end = matcher.end(); String match = input.substring(start, end); System.out.println(match); } } catch (IOException e) { e.printStackTrace(); } catch (PatternSyntaxException e) { e.printStackTrace(); } } }