Display all URLs in a web page by matching a regular expression that describes the HTML tag
/*
This program is a part of the companion code for Core Java 8th ed.
(http://horstmann.com/corejava)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* This program displays all URLs in a web page by matching a regular expression that describes the
* <a href=...> HTML tag. Start the program as <br>
* java HrefMatch URL
* @version 1.01 2004-06-04
* @author Cay Horstmann
*/
public class HrefMatch
{
public static void main(String[] args)
{
try
{
// get URL string from command line or use default
String urlString;
if (args.length > 0) urlString = args[0];
else urlString = "http://java.sun.com";
// open reader for URL
InputStreamReader in = new InputStreamReader(new URL(urlString).openStream());
// read contents into string builder
StringBuilder input = new StringBuilder();
int ch;
while ((ch = in.read()) != -1)
input.append((char) ch);
// search for all occurrences of pattern
String patternString = "<a\\s+href\\s*=\\s*(\"[^\"]*\"|[^\\s>]*)\\s*>";
Pattern pattern = Pattern.compile(patternString, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(input);
while (matcher.find())
{
int start = matcher.start();
int end = matcher.end();
String match = input.substring(start, end);
System.out.println(match);
}
}
catch (IOException e)
{
e.printStackTrace();
}
catch (PatternSyntaxException e)
{
e.printStackTrace();
}
}
}
Related examples in the same category