101 lines
3.1 KiB
Java
101 lines
3.1 KiB
Java
|
HTML Parse Demo
|
||
|
|
||
|
import java.io.*;
|
||
|
import java.net.*;
|
||
|
import javax.swing.text.*;
|
||
|
import javax.swing.text.html.*;
|
||
|
import javax.swing.text.html.parser.*;
|
||
|
|
||
|
public class HtmlParseDemo {
|
||
|
public static void main(String [] args) {
|
||
|
Reader r;
|
||
|
if (args.length == 0) {
|
||
|
System.err.println("Usage: java HTMLParseDemo [url | file]");
|
||
|
System.exit(0);
|
||
|
}
|
||
|
String spec = args[0];
|
||
|
try {
|
||
|
if (spec.indexOf("://") > 0) {
|
||
|
URL u = new URL(spec);
|
||
|
Object content = u.getContent();
|
||
|
if (content instanceof InputStream) {
|
||
|
r = new InputStreamReader((InputStream)content);
|
||
|
}
|
||
|
else if (content instanceof Reader) {
|
||
|
r = (Reader)content;
|
||
|
}
|
||
|
else {
|
||
|
throw new Exception("Bad URL content type.");
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
r = new FileReader(spec);
|
||
|
}
|
||
|
|
||
|
HTMLEditorKit.Parser parser;
|
||
|
System.out.println("About to parse " + spec);
|
||
|
parser = new ParserDelegator();
|
||
|
parser.parse(r, new HTMLParseLister(), true);
|
||
|
r.close();
|
||
|
}
|
||
|
catch (Exception e) {
|
||
|
System.err.println("Error: " + e);
|
||
|
e.printStackTrace(System.err);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* HTML parsing proceeds by calling a callback for
|
||
|
* each and every piece of the HTML document. This
|
||
|
* simple callback class simply prints an indented
|
||
|
* structural listing of the HTML data.
|
||
|
*/
|
||
|
class HTMLParseLister extends HTMLEditorKit.ParserCallback
|
||
|
{
|
||
|
int indentSize = 0;
|
||
|
|
||
|
protected void indent() {
|
||
|
indentSize += 3;
|
||
|
}
|
||
|
protected void unIndent() {
|
||
|
indentSize -= 3; if (indentSize < 0) indentSize = 0;
|
||
|
}
|
||
|
|
||
|
protected void pIndent() {
|
||
|
for(int i = 0; i < indentSize; i++) System.out.print(" ");
|
||
|
}
|
||
|
|
||
|
public void handleText(char[] data, int pos) {
|
||
|
pIndent();
|
||
|
System.out.println("Text(" + data.length + " chars)");
|
||
|
}
|
||
|
|
||
|
public void handleComment(char[] data, int pos) {
|
||
|
pIndent();
|
||
|
System.out.println("Comment(" + data.length + " chars)");
|
||
|
}
|
||
|
|
||
|
public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
|
||
|
pIndent();
|
||
|
System.out.println("Tag start(<" + t.toString() + ">, " +
|
||
|
a.getAttributeCount() + " attrs)");
|
||
|
indent();
|
||
|
}
|
||
|
|
||
|
public void handleEndTag(HTML.Tag t, int pos) {
|
||
|
unIndent();
|
||
|
pIndent();
|
||
|
System.out.println("Tag end( }
|
||
|
|
||
|
public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
|
||
|
pIndent();
|
||
|
System.out.println("Tag(<" + t.toString() + ">, " +
|
||
|
a.getAttributeCount() + " attrs)");
|
||
|
}
|
||
|
|
||
|
public void handleError(String errorMsg, int pos){
|
||
|
System.out.println("Parsing error: " + errorMsg + " at " + pos);
|
||
|
}
|
||
|
}
|