第一百八十五节 Java XML教程

Java XML教程 - Java DOM简介

DOM是标准的树结构，其中每个节点包含来自XML结构的一个组件。

XML文档中两种最常见的节点类型是元素节点和文本节点。

使用Java DOM API，我们可以创建节点，删除节点，更改其内容，并遍历节点层次结构。

何时使用DOM

文档对象模型标准是为XML文档操作而设计的。

DOM的用意是语言无关的。Java的DOM解析器没有利用Java的面向对象的特性优势。

混合内容模型

文本和元素在DOM层次结构中混合。这种结构在DOM模型中称为混合内容。

例如，我们有以下xml结构:

<yourTag>This is an <bold>important</bold> test.</yourTag>

DOM节点的层级如下，其中每行代表一个节点:

ELEMENT: yourTag
   + TEXT: This is an
   + ELEMENT: bold
     + TEXT: important
   + TEXT: test.

yourTag 元素包含文本，后跟一个子元素，后跟另外的文本。

节点类型

为了支持混合内容，DOM节点非常简单。标签元素的“内容"标识它是的节点的类型。

例如，<yourTag> 节点内容是元素 yourTag的名称。

DOM节点API定义 nodeValue()， nodeType()和 nodeName()方法。

对于元素节点< yourTag> nodeName()返回yourTag，而nodeValue()返回null。

对于文本节点 + TEXT:这是一个nodeName()返回#text，nodeValue()返回“This is an"。

例子

以下代码显示了如何使用DOM解析器来解析xml文件并获取一个 org.w3c.dom.Document 对象。

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;

public class Main {

  public static void main(String[] args) throws Exception {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    DocumentBuilder db = null;
    db = dbf.newDocumentBuilder();
    Document doc = db.parse(new File("games.xml"));
  }
}

例2

以下代码显示如何执行DOM转储。

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class Main{
  static public void main(String[] arg) throws Exception{
    String filename = "input.xml";
    boolean validate = true;
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(validate);
    dbf.setNamespaceAware(true);
    dbf.setIgnoringElementContentWhitespace(true);

    DocumentBuilder builder = dbf.newDocumentBuilder();
    builder.setErrorHandler(new MyErrorHandler());
    InputSource is = new InputSource(filename);
    Document doc = builder.parse(is);
    TreeDumper td = new TreeDumper();
    td.dump(doc);
  }
}
class TreeDumper {
    public void dump(Document doc) {
        dumpLoop((Node)doc,"");
    }
    private void dumpLoop(Node node,String indent) {
        switch(node.getNodeType()) {
        case Node.CDATA_SECTION_NODE:
            System.out.println(indent + "CDATA_SECTION_NODE");
            break;
        case Node.COMMENT_NODE:
            System.out.println(indent + "COMMENT_NODE");
            break;
        case Node.DOCUMENT_FRAGMENT_NODE:
            System.out.println(indent + "DOCUMENT_FRAGMENT_NODE");
            break;
        case Node.DOCUMENT_NODE:
            System.out.println(indent + "DOCUMENT_NODE");
            break;
        case Node.DOCUMENT_TYPE_NODE:
            System.out.println(indent + "DOCUMENT_TYPE_NODE");
            break;
        case Node.ELEMENT_NODE:
            System.out.println(indent + "ELEMENT_NODE");
            break;
        case Node.ENTITY_NODE:
            System.out.println(indent + "ENTITY_NODE");
            break;
        case Node.ENTITY_REFERENCE_NODE:
            System.out.println(indent + "ENTITY_REFERENCE_NODE");
            break;
        case Node.NOTATION_NODE:
            System.out.println(indent + "NOTATION_NODE");
            break;
        case Node.PROCESSING_INSTRUCTION_NODE:
            System.out.println(indent + "PROCESSING_INSTRUCTION_NODE");
            break;
        case Node.TEXT_NODE:
            System.out.println(indent + "TEXT_NODE");
            break;
        default:
            System.out.println(indent + "Unknown node");
            break;
        }

        NodeList list = node.getChildNodes();
        for(int i=0; i<list.getLength(); i++)
            dumpLoop(list.item(i),indent + "   ");

    }
}
class MyErrorHandler implements ErrorHandler {
  public void warning(SAXParseException e) throws SAXException {
    show("Warning", e);
    throw (e);
  }

  public void error(SAXParseException e) throws SAXException {
    show("Error", e);
    throw (e);
  }

  public void fatalError(SAXParseException e) throws SAXException {
    show("Fatal Error", e);
    throw (e);
  }

  private void show(String type, SAXParseException e) {
    System.out.println(type + ": " + e.getMessage());
    System.out.println("Line " + e.getLineNumber() + " Column "
        + e.getColumnNumber());
    System.out.println("System ID: " + e.getSystemId());
  }
}

错误处理程序

以下代码显示了如何在使用DOM解析器解析XML时处理错误。

import java.io.IOException; 

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class DOMCheck {
  static public void main(String[] arg) {
    boolean validate = true;

    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(validate);
    dbf.setNamespaceAware(true);

    try {
      DocumentBuilder builder = dbf.newDocumentBuilder();
      builder.setErrorHandler(new MyErrorHandler());
      InputSource is = new InputSource("person.xml");
      Document doc = builder.parse(is);
    } catch (SAXException e) {
      System.out.println(e);
    } catch (ParserConfigurationException e) {
      System.err.println(e);
    } catch (IOException e) {
      System.err.println(e);
    }
  }
}

class MyErrorHandler implements ErrorHandler {
  public void warning(SAXParseException e) throws SAXException {
    show("Warning", e);
    throw (e);
  }

  public void error(SAXParseException e) throws SAXException {
    show("Error", e);
    throw (e);
  }

  public void fatalError(SAXParseException e) throws SAXException {
    show("Fatal Error", e);
    throw (e);
  }

  private void show(String type, SAXParseException e) {
    System.out.println(type + ": " + e.getMessage());
    System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
    System.out.println("System ID: " + e.getSystemId());
  }
}

例3

以下代码显示了如何递归访问DOM树中的所有节点。

import java.io.File;

import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class Main {
  public static void main(String[] argv) throws Exception{
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);

    factory.setExpandEntityReferences(false);

    Document doc = factory.newDocumentBuilder().parse(new File("file.xml"));

    visit(doc, 0);
  }
  public static void visit(Node node, int level) {
    NodeList list = node.getChildNodes();
    for (int i = 0; i < list.getLength(); i++) {
      Node childNode = list.item(i);
      visit(childNode, level + 1);
    }
  }
}

例4

下面的代码显示了如何将XML片段转换为DOM片段。

import java.io.File;
import java.io.StringReader;

import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;

public class Main {
  public static void main(String[] argv) throws Exception {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);

    Document doc = factory.newDocumentBuilder().parse(new File("infilename.xml"));

    String fragment = "<fragment>aaa</fragment>";

    factory = DocumentBuilderFactory.newInstance();
    Document d = factory.newDocumentBuilder().parse(new InputSource(new StringReader(fragment)));

    Node node = doc.importNode(d.getDocumentElement(), true);

    DocumentFragment docfrag = doc.createDocumentFragment();

    while (node.hasChildNodes()) {
      docfrag.appendChild(node.removeChild(node.getFirstChild()));
    }

    Element element = doc.getDocumentElement();
    element.appendChild(docfrag);
  }

}

例5

下面的代码显示了如何解析XML字符串:使用DOM和StringReader。

import java.io.StringReader;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class Main {
  public static void main(String arg[]) throws Exception{
    String xmlRecords = "<data><employee><name>A</name>"
        + "<title>Manager</title></employee></data>";

    DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    InputSource is = new InputSource();
    is.setCharacterStream(new StringReader(xmlRecords));

    Document doc = db.parse(is);
    NodeList nodes = doc.getElementsByTagName("employee");

    for (int i = 0; i < nodes.getLength(); i++) {
      Element element = (Element) nodes.item(i);

      NodeList name = element.getElementsByTagName("name");
      Element line = (Element) name.item(0);
      System.out.println("Name: " + getCharacterDataFromElement(line));

      NodeList title = element.getElementsByTagName("title");
      line = (Element) title.item(0);
      System.out.println("Title: " + getCharacterDataFromElement(line));
    }

  }

  public static String getCharacterDataFromElement(Element e) {
    Node child = e.getFirstChild();
    if (child instanceof CharacterData) {
      CharacterData cd = (CharacterData) child;
      return cd.getData();
    }
    return "";
  }
}

上面的代码生成以下结果。