Java XML教程 - Java DOM简介
DOM是标准的树结构,其中每个节点包含来自XML结构的一个组件。
XML文档中两种最常见的节点类型是元素节点和文本节点。
使用Java DOM API,我们可以创建节点,删除节点,更改其内容,并遍历节点层次结构。
何时使用DOM
文档对象模型标准是为XML文档操作而设计的。
DOM的用意是语言无关的。Java的DOM解析器没有利用Java的面向对象的特性优势。
混合内容模型
文本和元素在DOM层次结构中混合。这种结构在DOM模型中称为混合内容。
例如,我们有以下xml结构:
<yourTag>This is an <bold>important</bold> test.</yourTag>
DOM节点的层级如下,其中每行代表一个节点:
ELEMENT: yourTag + TEXT: This is an + ELEMENT: bold + TEXT: important + TEXT: test.
yourTag
元素包含文本,后跟一个子元素,后跟另外的文本。
节点类型
为了支持混合内容,DOM节点非常简单。标签元素的“内容"标识它是的节点的类型。
例如,<yourTag> 节点内容是元素 yourTag
的名称。
DOM节点API定义 nodeValue()
, nodeType()
和 nodeName()
方法。
对于元素节点< yourTag>
nodeName()返回yourTag,而nodeValue()返回null。
对于文本节点 + TEXT:这是一个
nodeName()返回#text,nodeValue()返回“This is an"。
例子
以下代码显示了如何使用DOM解析器来解析xml文件并获取一个 org.w3c.dom.Document
对象。
import java.io.File; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; public class Main { public static void main(String[] args) throws Exception { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = null; db = dbf.newDocumentBuilder(); Document doc = db.parse(new File("games.xml")); } }
例2
以下代码显示如何执行DOM转储。
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; public class Main{ static public void main(String[] arg) throws Exception{ String filename = "input.xml"; boolean validate = true; DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(validate); dbf.setNamespaceAware(true); dbf.setIgnoringElementContentWhitespace(true); DocumentBuilder builder = dbf.newDocumentBuilder(); builder.setErrorHandler(new MyErrorHandler()); InputSource is = new InputSource(filename); Document doc = builder.parse(is); TreeDumper td = new TreeDumper(); td.dump(doc); } } class TreeDumper { public void dump(Document doc) { dumpLoop((Node)doc,""); } private void dumpLoop(Node node,String indent) { switch(node.getNodeType()) { case Node.CDATA_SECTION_NODE: System.out.println(indent + "CDATA_SECTION_NODE"); break; case Node.COMMENT_NODE: System.out.println(indent + "COMMENT_NODE"); break; case Node.DOCUMENT_FRAGMENT_NODE: System.out.println(indent + "DOCUMENT_FRAGMENT_NODE"); break; case Node.DOCUMENT_NODE: System.out.println(indent + "DOCUMENT_NODE"); break; case Node.DOCUMENT_TYPE_NODE: System.out.println(indent + "DOCUMENT_TYPE_NODE"); break; case Node.ELEMENT_NODE: System.out.println(indent + "ELEMENT_NODE"); break; case Node.ENTITY_NODE: System.out.println(indent + "ENTITY_NODE"); break; case Node.ENTITY_REFERENCE_NODE: System.out.println(indent + "ENTITY_REFERENCE_NODE"); break; case Node.NOTATION_NODE: System.out.println(indent + "NOTATION_NODE"); break; case Node.PROCESSING_INSTRUCTION_NODE: System.out.println(indent + "PROCESSING_INSTRUCTION_NODE"); break; case Node.TEXT_NODE: System.out.println(indent + "TEXT_NODE"); break; default: System.out.println(indent + "Unknown node"); break; } NodeList list = node.getChildNodes(); for(int i=0; i<list.getLength(); i++) dumpLoop(list.item(i),indent + " "); } } class MyErrorHandler implements ErrorHandler { public void warning(SAXParseException e) throws SAXException { show("Warning", e); throw (e); } public void error(SAXParseException e) throws SAXException { show("Error", e); throw (e); } public void fatalError(SAXParseException e) throws SAXException { show("Fatal Error", e); throw (e); } private void show(String type, SAXParseException e) { System.out.println(type + ": " + e.getMessage()); System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber()); System.out.println("System ID: " + e.getSystemId()); } }
错误处理程序
以下代码显示了如何在使用DOM解析器解析XML时处理错误。
import java.io.IOException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; public class DOMCheck { static public void main(String[] arg) { boolean validate = true; DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(validate); dbf.setNamespaceAware(true); try { DocumentBuilder builder = dbf.newDocumentBuilder(); builder.setErrorHandler(new MyErrorHandler()); InputSource is = new InputSource("person.xml"); Document doc = builder.parse(is); } catch (SAXException e) { System.out.println(e); } catch (ParserConfigurationException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } } } class MyErrorHandler implements ErrorHandler { public void warning(SAXParseException e) throws SAXException { show("Warning", e); throw (e); } public void error(SAXParseException e) throws SAXException { show("Error", e); throw (e); } public void fatalError(SAXParseException e) throws SAXException { show("Fatal Error", e); throw (e); } private void show(String type, SAXParseException e) { System.out.println(type + ": " + e.getMessage()); System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber()); System.out.println("System ID: " + e.getSystemId()); } }
例3
以下代码显示了如何递归访问DOM树中的所有节点。
import java.io.File; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; public class Main { public static void main(String[] argv) throws Exception{ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(true); factory.setExpandEntityReferences(false); Document doc = factory.newDocumentBuilder().parse(new File("file.xml")); visit(doc, 0); } public static void visit(Node node, int level) { NodeList list = node.getChildNodes(); for (int i = 0; i < list.getLength(); i++) { Node childNode = list.item(i); visit(childNode, level + 1); } } }
例4
下面的代码显示了如何将XML片段转换为DOM片段。
import java.io.File; import java.io.StringReader; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.DocumentFragment; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.InputSource; public class Main { public static void main(String[] argv) throws Exception { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(true); Document doc = factory.newDocumentBuilder().parse(new File("infilename.xml")); String fragment = "<fragment>aaa</fragment>"; factory = DocumentBuilderFactory.newInstance(); Document d = factory.newDocumentBuilder().parse(new InputSource(new StringReader(fragment))); Node node = doc.importNode(d.getDocumentElement(), true); DocumentFragment docfrag = doc.createDocumentFragment(); while (node.hasChildNodes()) { docfrag.appendChild(node.removeChild(node.getFirstChild())); } Element element = doc.getDocumentElement(); element.appendChild(docfrag); } }
例5
下面的代码显示了如何解析XML字符串:使用DOM和StringReader。
import java.io.StringReader; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.CharacterData; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; public class Main { public static void main(String arg[]) throws Exception{ String xmlRecords = "<data><employee><name>A</name>" + "<title>Manager</title></employee></data>"; DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); InputSource is = new InputSource(); is.setCharacterStream(new StringReader(xmlRecords)); Document doc = db.parse(is); NodeList nodes = doc.getElementsByTagName("employee"); for (int i = 0; i < nodes.getLength(); i++) { Element element = (Element) nodes.item(i); NodeList name = element.getElementsByTagName("name"); Element line = (Element) name.item(0); System.out.println("Name: " + getCharacterDataFromElement(line)); NodeList title = element.getElementsByTagName("title"); line = (Element) title.item(0); System.out.println("Title: " + getCharacterDataFromElement(line)); } } public static String getCharacterDataFromElement(Element e) { Node child = e.getFirstChild(); if (child instanceof CharacterData) { CharacterData cd = (CharacterData) child; return cd.getData(); } return ""; } }
上面的代码生成以下结果。