Java tutorial
import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Writer; import java.net.MalformedURLException; import java.net.URL; import java.util.Enumeration; import javax.swing.text.AttributeSet; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML; import javax.swing.text.html.HTMLEditorKit; public class MainClass { public static void main(String[] args) throws Exception { ParserGetter kit = new ParserGetter(); HTMLEditorKit.Parser parser = kit.getParser(); URL u = new URL("http://www.java2s.com"); InputStream in = u.openStream(); InputStreamReader r = new InputStreamReader(in); String remoteFileName = u.getFile(); if (remoteFileName.endsWith("/")) { remoteFileName += "index.html"; } if (remoteFileName.startsWith("/")) { remoteFileName = remoteFileName.substring(1); } File localDirectory = new File(u.getHost()); while (remoteFileName.indexOf('/') > -1) { String part = remoteFileName.substring(0, remoteFileName.indexOf('/')); remoteFileName = remoteFileName.substring(remoteFileName.indexOf('/') + 1); localDirectory = new File(localDirectory, part); } if (localDirectory.mkdirs()) { File output = new File(localDirectory, remoteFileName); FileWriter out = new FileWriter(output); HTMLEditorKit.ParserCallback callback = new PageSaver(out, u); parser.parse(r, callback, false); } } } class PageSaver extends HTMLEditorKit.ParserCallback { private Writer out; private URL base; public PageSaver(Writer out, URL base) { this.out = out; this.base = base; } public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { try { out.write("<" + tag); this.writeAttributes(attributes); if (tag == HTML.Tag.APPLET && attributes.getAttribute(HTML.Attribute.CODEBASE) == null) { String codebase = base.toString(); if (codebase.endsWith(".htm") || codebase.endsWith(".html")) { codebase = codebase.substring(0, codebase.lastIndexOf('/')); } out.write(" codebase=\"" + codebase + "\""); } out.write(">"); out.flush(); } catch (IOException ex) { System.err.println(ex); } } public void handleEndTag(HTML.Tag tag, int position) { try { out.write("</" + tag + ">"); out.flush(); } catch (IOException ex) { System.err.println(ex); } } private void writeAttributes(AttributeSet attributes) throws IOException { Enumeration e = attributes.getAttributeNames(); while (e.hasMoreElements()) { Object name = e.nextElement(); String value = (String) attributes.getAttribute(name); try { if (name == HTML.Attribute.HREF || name == HTML.Attribute.SRC || name == HTML.Attribute.LOWSRC || name == HTML.Attribute.CODEBASE) { URL u = new URL(base, value); out.write(" " + name + "=\"" + u + "\""); } else { out.write(" " + name + "=\"" + value + "\""); } } catch (MalformedURLException ex) { System.err.println(ex); System.err.println(base); System.err.println(value); ex.printStackTrace(); } } } public void handleComment(char[] text, int position) { try { out.write("<!-- "); out.write(text); out.write(" -->"); out.flush(); } catch (IOException ex) { System.err.println(ex); } } public void handleText(char[] text, int position) { try { out.write(text); out.flush(); } catch (IOException ex) { System.err.println(ex); } } public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { try { out.write("<" + tag); this.writeAttributes(attributes); out.write(">"); } catch (IOException e) { e.printStackTrace(); } } } class ParserGetter extends HTMLEditorKit { public HTMLEditorKit.Parser getParser() { return super.getParser(); } }