001 /* $Id: NodeCreateRule.java 992060 2010-09-02 19:09:47Z simonetripodi $
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one or more
004 * contributor license agreements. See the NOTICE file distributed with
005 * this work for additional information regarding copyright ownership.
006 * The ASF licenses this file to You under the Apache License, Version 2.0
007 * (the "License"); you may not use this file except in compliance with
008 * the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019
020 package org.apache.commons.digester;
021
022
023 import javax.xml.parsers.DocumentBuilder;
024 import javax.xml.parsers.DocumentBuilderFactory;
025 import javax.xml.parsers.ParserConfigurationException;
026
027 import org.w3c.dom.Attr;
028 import org.w3c.dom.DOMException;
029 import org.w3c.dom.Document;
030 import org.w3c.dom.Element;
031 import org.w3c.dom.Node;
032 import org.xml.sax.Attributes;
033 import org.xml.sax.ContentHandler;
034 import org.xml.sax.SAXException;
035 import org.xml.sax.helpers.DefaultHandler;
036
037
038 /**
039 * A rule implementation that creates a DOM
040 * {@link org.w3c.dom.Node Node} containing the XML at the element that matched
041 * the rule. Two concrete types of nodes can be created by this rule:
042 * <ul>
043 * <li>the default is to create an {@link org.w3c.dom.Element Element} node.
044 * The created element will correspond to the element that matched the rule,
045 * containing all XML content underneath that element.</li>
046 * <li>alternatively, this rule can create nodes of type
047 * {@link org.w3c.dom.DocumentFragment DocumentFragment}, which will contain
048 * only the XML content under the element the rule was trigged on.</li>
049 * </ul>
050 * The created node will be normalized, meaning it will not contain text nodes
051 * that only contain white space characters.
052 *
053
054 *
055 * <p>The created <code>Node</code> will be pushed on Digester's object stack
056 * when done. To use it in the context of another DOM
057 * {@link org.w3c.dom.Document Document}, it must be imported first, using the
058 * Document method
059 * {@link org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) importNode()}.
060 * </p>
061 *
062 * <p><strong>Important Note:</strong> This is implemented by replacing the SAX
063 * {@link org.xml.sax.ContentHandler ContentHandler} in the parser used by
064 * Digester, and resetting it when the matched element is closed. As a side
065 * effect, rules that would match XML nodes under the element that matches
066 * a <code>NodeCreateRule</code> will never be triggered by Digester, which
067 * usually is the behavior one would expect.</p>
068 *
069 * <p><strong>Note</strong> that the current implementation does not set the namespace prefixes
070 * in the exported nodes. The (usually more important) namespace URIs are set,
071 * of course.</p>
072 *
073 * @since Digester 1.4
074 */
075
076 public class NodeCreateRule extends Rule {
077
078
079 // ---------------------------------------------------------- Inner Classes
080
081
082 /**
083 * The SAX content handler that does all the actual work of assembling the
084 * DOM node tree from the SAX events.
085 */
086 private class NodeBuilder
087 extends DefaultHandler {
088
089
090 // ------------------------------------------------------- Constructors
091
092
093 /**
094 * Constructor.
095 *
096 * <p>Stores the content handler currently used by Digester so it can
097 * be reset when done, and initializes the DOM objects needed to
098 * build the node.</p>
099 *
100 * @param doc the document to use to create nodes
101 * @param root the root node
102 * @throws ParserConfigurationException if the DocumentBuilderFactory
103 * could not be instantiated
104 * @throws SAXException if the XMLReader could not be instantiated by
105 * Digester (should not happen)
106 */
107 public NodeBuilder(Document doc, Node root)
108 throws ParserConfigurationException, SAXException {
109
110 this.doc = doc;
111 this.root = root;
112 this.top = root;
113
114 oldContentHandler = digester.getCustomContentHandler();
115
116 }
117
118
119 // ------------------------------------------------- Instance Variables
120
121
122 /**
123 * The content handler used by Digester before it was set to this
124 * content handler.
125 */
126 protected ContentHandler oldContentHandler = null;
127
128
129 /**
130 * Depth of the current node, relative to the element where the content
131 * handler was put into action.
132 */
133 protected int depth = 0;
134
135
136 /**
137 * A DOM Document used to create the various Node instances.
138 */
139 protected Document doc = null;
140
141
142 /**
143 * The DOM node that will be pushed on Digester's stack.
144 */
145 protected Node root = null;
146
147
148 /**
149 * The current top DOM mode.
150 */
151 protected Node top = null;
152
153 /**
154 * The text content of the current top DOM node.
155 */
156 protected StringBuffer topText = new StringBuffer();
157
158
159 // --------------------------------------------- Helper Methods
160
161 /**
162 * Appends a {@link org.w3c.dom.Text Text} node to the current node
163 * if the content reported by the parser is not purely whitespace.
164 */
165 private void addTextIfPresent() throws SAXException {
166 if (topText.length() > 0) {
167 String str = topText.toString();
168 topText.setLength(0);
169
170 if (str.trim().length() > 0) {
171 // The contained text is not *pure* whitespace, so create
172 // a text node to hold it. Note that the "untrimmed" text
173 // is stored in the node.
174 try {
175 top.appendChild(doc.createTextNode(str));
176 } catch (DOMException e) {
177 throw new SAXException(e.getMessage());
178 }
179 }
180 }
181 }
182
183 // --------------------------------------------- ContentHandler Methods
184
185
186 /**
187 * Handle notification about text embedded within the current node.
188 * <p>
189 * An xml parser calls this when text is found. We need to ensure that this
190 * text gets attached to the new Node we are creating - except in the case
191 * where the only text in the node is whitespace.
192 * <p>
193 * There is a catch, however. According to the sax specification, a parser
194 * does not need to pass all of the text content of a node in one go; it can
195 * make multiple calls passing part of the data on each call. In particular,
196 * when the body of an element includes xml entity-references, at least some
197 * parsers make a separate call to this method to pass just the entity content.
198 * <p>
199 * In this method, we therefore just append the provided text to a
200 * "current text" buffer. When the element end is found, or a child element
201 * is found then we can check whether we have all-whitespace. See method
202 * addTextIfPresent.
203 *
204 * @param ch the characters from the XML document
205 * @param start the start position in the array
206 * @param length the number of characters to read from the array
207 * @throws SAXException if the DOM implementation throws an exception
208 */
209 @Override
210 public void characters(char[] ch, int start, int length)
211 throws SAXException {
212
213 topText.append(ch, start, length);
214 }
215
216
217 /**
218 * Checks whether control needs to be returned to Digester.
219 *
220 * @param namespaceURI the namespace URI
221 * @param localName the local name
222 * @param qName the qualified (prefixed) name
223 * @throws SAXException if the DOM implementation throws an exception
224 */
225 @Override
226 public void endElement(String namespaceURI, String localName,
227 String qName)
228 throws SAXException {
229
230 addTextIfPresent();
231
232 try {
233 if (depth == 0) {
234 getDigester().setCustomContentHandler(oldContentHandler);
235 getDigester().push(root);
236 getDigester().endElement(namespaceURI, localName, qName);
237 }
238
239 top = top.getParentNode();
240 depth--;
241 } catch (DOMException e) {
242 throw new SAXException(e.getMessage());
243 }
244
245 }
246
247
248 /**
249 * Adds a new
250 * {@link org.w3c.dom.ProcessingInstruction ProcessingInstruction} to
251 * the current node.
252 *
253 * @param target the processing instruction target
254 * @param data the processing instruction data, or null if none was
255 * supplied
256 * @throws SAXException if the DOM implementation throws an exception
257 */
258 @Override
259 public void processingInstruction(String target, String data)
260 throws SAXException {
261
262 try {
263 top.appendChild(doc.createProcessingInstruction(target, data));
264 } catch (DOMException e) {
265 throw new SAXException(e.getMessage());
266 }
267
268 }
269
270
271 /**
272 * Adds a new child {@link org.w3c.dom.Element Element} to the current
273 * node.
274 *
275 * @param namespaceURI the namespace URI
276 * @param localName the local name
277 * @param qName the qualified (prefixed) name
278 * @param atts the list of attributes
279 * @throws SAXException if the DOM implementation throws an exception
280 */
281 @Override
282 public void startElement(String namespaceURI, String localName,
283 String qName, Attributes atts)
284 throws SAXException {
285
286 addTextIfPresent();
287
288 try {
289 Node previousTop = top;
290 if ((localName == null) || (localName.length() == 0)) {
291 top = doc.createElement(qName);
292 } else {
293 top = doc.createElementNS(namespaceURI, localName);
294 }
295 for (int i = 0; i < atts.getLength(); i++) {
296 Attr attr = null;
297 if ((atts.getLocalName(i) == null) ||
298 (atts.getLocalName(i).length() == 0)) {
299 attr = doc.createAttribute(atts.getQName(i));
300 attr.setNodeValue(atts.getValue(i));
301 ((Element)top).setAttributeNode(attr);
302 } else {
303 attr = doc.createAttributeNS(atts.getURI(i),
304 atts.getLocalName(i));
305 attr.setNodeValue(atts.getValue(i));
306 ((Element)top).setAttributeNodeNS(attr);
307 }
308 }
309 previousTop.appendChild(top);
310 depth++;
311 } catch (DOMException e) {
312 throw new SAXException(e.getMessage());
313 }
314
315 }
316
317 }
318
319
320 // ----------------------------------------------------------- Constructors
321
322
323 /**
324 * Default constructor. Creates an instance of this rule that will create a
325 * DOM {@link org.w3c.dom.Element Element}.
326 */
327 public NodeCreateRule() throws ParserConfigurationException {
328
329 this(Node.ELEMENT_NODE);
330
331 }
332
333
334 /**
335 * Constructor. Creates an instance of this rule that will create a DOM
336 * {@link org.w3c.dom.Element Element}, but lets you specify the JAXP
337 * <code>DocumentBuilder</code> that should be used when constructing the
338 * node tree.
339 *
340 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
341 */
342 public NodeCreateRule(DocumentBuilder documentBuilder) {
343
344 this(Node.ELEMENT_NODE, documentBuilder);
345
346 }
347
348
349 /**
350 * Constructor. Creates an instance of this rule that will create either a
351 * DOM {@link org.w3c.dom.Element Element} or a DOM
352 * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
353 * value of the <code>nodeType</code> parameter.
354 *
355 * @param nodeType the type of node to create, which can be either
356 * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or
357 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
358 * @throws IllegalArgumentException if the node type is not supported
359 */
360 public NodeCreateRule(int nodeType) throws ParserConfigurationException {
361
362 this(nodeType,
363 DocumentBuilderFactory.newInstance().newDocumentBuilder());
364
365 }
366
367
368 /**
369 * Constructor. Creates an instance of this rule that will create either a
370 * DOM {@link org.w3c.dom.Element Element} or a DOM
371 * {@link org.w3c.dom.DocumentFragment DocumentFragment}, depending on the
372 * value of the <code>nodeType</code> parameter. This constructor lets you
373 * specify the JAXP <code>DocumentBuilder</code> that should be used when
374 * constructing the node tree.
375 *
376 * @param nodeType the type of node to create, which can be either
377 * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} or
378 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
379 * @param documentBuilder the JAXP <code>DocumentBuilder</code> to use
380 * @throws IllegalArgumentException if the node type is not supported
381 */
382 public NodeCreateRule(int nodeType, DocumentBuilder documentBuilder) {
383
384 if (!((nodeType == Node.DOCUMENT_FRAGMENT_NODE) ||
385 (nodeType == Node.ELEMENT_NODE))) {
386 throw new IllegalArgumentException(
387 "Can only create nodes of type DocumentFragment and Element");
388 }
389 this.nodeType = nodeType;
390 this.documentBuilder = documentBuilder;
391
392 }
393
394
395 // ----------------------------------------------------- Instance Variables
396
397
398 /**
399 * The JAXP <code>DocumentBuilder</code> to use.
400 */
401 private DocumentBuilder documentBuilder = null;
402
403
404 /**
405 * The type of the node that should be created. Must be one of the
406 * constants defined in {@link org.w3c.dom.Node Node}, but currently only
407 * {@link org.w3c.dom.Node#ELEMENT_NODE Node.ELEMENT_NODE} and
408 * {@link org.w3c.dom.Node#DOCUMENT_FRAGMENT_NODE Node.DOCUMENT_FRAGMENT_NODE}
409 * are allowed values.
410 */
411 private int nodeType = Node.ELEMENT_NODE;
412
413
414 // ----------------------------------------------------------- Rule Methods
415
416
417 /**
418 * When this method fires, the digester is told to forward all SAX
419 * ContentHandler events to the builder object, resulting in a DOM being
420 * built instead of normal digester rule-handling occurring. When the
421 * end of the current xml element is encountered, the original content
422 * handler is restored (expected to be NULL, allowing normal Digester
423 * operations to continue).
424 *
425 * @param namespaceURI the namespace URI of the matching element, or an
426 * empty string if the parser is not namespace aware or the element has
427 * no namespace
428 * @param name the local name if the parser is namespace aware, or just
429 * the element name otherwise
430 * @param attributes The attribute list of this element
431 * @throws Exception indicates a JAXP configuration problem
432 */
433 @Override
434 public void begin(String namespaceURI, String name, Attributes attributes)
435 throws Exception {
436
437 Document doc = documentBuilder.newDocument();
438 NodeBuilder builder = null;
439 if (nodeType == Node.ELEMENT_NODE) {
440 Element element = null;
441 if (getDigester().getNamespaceAware()) {
442 element =
443 doc.createElementNS(namespaceURI, name);
444 for (int i = 0; i < attributes.getLength(); i++) {
445 element.setAttributeNS(attributes.getURI(i),
446 attributes.getQName(i),
447 attributes.getValue(i));
448 }
449 } else {
450 element = doc.createElement(name);
451 for (int i = 0; i < attributes.getLength(); i++) {
452 element.setAttribute(attributes.getQName(i),
453 attributes.getValue(i));
454 }
455 }
456 builder = new NodeBuilder(doc, element);
457 } else {
458 builder = new NodeBuilder(doc, doc.createDocumentFragment());
459 }
460 // the NodeBuilder constructor has already saved the original
461 // value of the digester's custom content handler (expected to
462 // be null, but we save it just in case). So now we just
463 // need to tell the digester to forward events to the builder.
464 getDigester().setCustomContentHandler(builder);
465 }
466
467
468 /**
469 * Pop the Node off the top of the stack.
470 */
471 @Override
472 public void end() throws Exception {
473
474 digester.pop();
475
476 }
477
478
479 }