HTML Parse Demo import java.io.*; import java.net.*; import javax.swing.text.*; import javax.swing.text.html.*; import javax.swing.text.html.parser.*; public class HtmlParseDemo { public static void main(String [] args) { Reader r; if (args.length == 0) { System.err.println("Usage: java HTMLParseDemo [url | file]"); System.exit(0); } String spec = args[0]; try { if (spec.indexOf("://") > 0) { URL u = new URL(spec); Object content = u.getContent(); if (content instanceof InputStream) { r = new InputStreamReader((InputStream)content); } else if (content instanceof Reader) { r = (Reader)content; } else { throw new Exception("Bad URL content type."); } } else { r = new FileReader(spec); } HTMLEditorKit.Parser parser; System.out.println("About to parse " + spec); parser = new ParserDelegator(); parser.parse(r, new HTMLParseLister(), true); r.close(); } catch (Exception e) { System.err.println("Error: " + e); e.printStackTrace(System.err); } } } /** * HTML parsing proceeds by calling a callback for * each and every piece of the HTML document. This * simple callback class simply prints an indented * structural listing of the HTML data. */ class HTMLParseLister extends HTMLEditorKit.ParserCallback { int indentSize = 0; protected void indent() { indentSize += 3; } protected void unIndent() { indentSize -= 3; if (indentSize < 0) indentSize = 0; } protected void pIndent() { for(int i = 0; i < indentSize; i++) System.out.print(" "); } public void handleText(char[] data, int pos) { pIndent(); System.out.println("Text(" + data.length + " chars)"); } public void handleComment(char[] data, int pos) { pIndent(); System.out.println("Comment(" + data.length + " chars)"); } public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { pIndent(); System.out.println("Tag start(<" + t.toString() + ">, " + a.getAttributeCount() + " attrs)"); indent(); } public void handleEndTag(HTML.Tag t, int pos) { unIndent(); pIndent(); System.out.println("Tag end( } public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { pIndent(); System.out.println("Tag(<" + t.toString() + ">, " + a.getAttributeCount() + " attrs)"); } public void handleError(String errorMsg, int pos){ System.out.println("Parsing error: " + errorMsg + " at " + pos); } }