001 /*
002 $Id: XmlParser.java 4132 2006-10-18 08:24:58Z paulk $
003
004 Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
005
006 Redistribution and use of this software and associated documentation
007 ("Software"), with or without modification, are permitted provided
008 that the following conditions are met:
009
010 1. Redistributions of source code must retain copyright
011 statements and notices. Redistributions must also contain a
012 copy of this document.
013
014 2. Redistributions in binary form must reproduce the
015 above copyright notice, this list of conditions and the
016 following disclaimer in the documentation and/or other
017 materials provided with the distribution.
018
019 3. The name "groovy" must not be used to endorse or promote
020 products derived from this Software without prior written
021 permission of The Codehaus. For written permission,
022 please contact info@codehaus.org.
023
024 4. Products derived from this Software may not be called "groovy"
025 nor may "groovy" appear in their names without prior written
026 permission of The Codehaus. "groovy" is a registered
027 trademark of The Codehaus.
028
029 5. Due credit should be given to The Codehaus -
030 http://groovy.codehaus.org/
031
032 THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
033 ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
034 NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
035 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
036 THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
037 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
038 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
039 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
040 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
041 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
042 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
043 OF THE POSSIBILITY OF SUCH DAMAGE.
044
045 */
046 package groovy.util;
047
048 import groovy.xml.QName;
049 import groovy.xml.FactorySupport;
050
051 import java.io.File;
052 import java.io.FileInputStream;
053 import java.io.IOException;
054 import java.io.InputStream;
055 import java.io.Reader;
056 import java.io.StringReader;
057 import java.util.ArrayList;
058 import java.util.HashMap;
059 import java.util.List;
060 import java.util.Map;
061
062 import javax.xml.parsers.ParserConfigurationException;
063 import javax.xml.parsers.SAXParser;
064 import javax.xml.parsers.SAXParserFactory;
065
066 import org.xml.sax.*;
067
068 /**
069 * A helper class for parsing XML into a tree of Node instances for
070 * a simple way of processing XML. This parser does not preserve the
071 * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
072 * This parser ignores comments and processing instructions and converts the
073 * XML into a Node for each element in the XML with attributes
074 * and child Nodes and Strings. This simple model is sufficient for
075 * most simple use cases of processing XML.
076 *
077 * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
078 * @version $Revision: 4132 $
079 */
080 public class XmlParser implements ContentHandler {
081
082 private StringBuffer bodyText = new StringBuffer();
083 private List stack = new ArrayList();
084 private Locator locator;
085 private XMLReader reader;
086 private Node parent;
087 private boolean trimWhitespace = true;
088
089 public XmlParser() throws ParserConfigurationException, SAXException {
090 this(false, true);
091 }
092
093 public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
094 SAXParserFactory factory = FactorySupport.createSaxParserFactory();
095 factory.setNamespaceAware(namespaceAware);
096 factory.setValidating(validating);
097 reader = factory.newSAXParser().getXMLReader();
098 }
099
100 public XmlParser(XMLReader reader) {
101 this.reader = reader;
102 }
103
104 public XmlParser(SAXParser parser) throws SAXException {
105 reader = parser.getXMLReader();
106 }
107
108
109 /**
110 * Parses the content of the given file as XML turning it into a tree
111 * of Nodes
112 */
113 public Node parse(File file) throws IOException, SAXException {
114 InputSource input = new InputSource(new FileInputStream(file));
115 input.setSystemId("file://" + file.getAbsolutePath());
116 getXMLReader().parse(input);
117 return parent;
118
119 }
120
121 /**
122 * Parse the content of the specified input source into a tree of Nodes.
123 */
124 public Node parse(InputSource input) throws IOException, SAXException {
125 getXMLReader().parse(input);
126 return parent;
127 }
128
129 /**
130 * Parse the content of the specified input stream into a tree of Nodes.
131 * Note that using this method will not provide the parser with any URI
132 * for which to find DTDs etc
133 */
134 public Node parse(InputStream input) throws IOException, SAXException {
135 InputSource is = new InputSource(input);
136 getXMLReader().parse(is);
137 return parent;
138 }
139
140 /**
141 * Parse the content of the specified reader into a tree of Nodes.
142 * Note that using this method will not provide the parser with any URI
143 * for which to find DTDs etc
144 */
145 public Node parse(Reader in) throws IOException, SAXException {
146 InputSource is = new InputSource(in);
147 getXMLReader().parse(is);
148 return parent;
149 }
150
151 /**
152 * Parse the content of the specified URI into a tree of Nodes
153 */
154 public Node parse(String uri) throws IOException, SAXException {
155 InputSource is = new InputSource(uri);
156 getXMLReader().parse(is);
157 return parent;
158 }
159
160 /**
161 * A helper method to parse the given text as XML
162 *
163 * @param text
164 */
165 public Node parseText(String text) throws IOException, SAXException {
166 return parse(new StringReader(text));
167 }
168 // Delegated XMLReader methods
169 //------------------------------------------------------------------------
170
171 /* (non-Javadoc)
172 * @see org.xml.sax.XMLReader#getDTDHandler()
173 */
174 public DTDHandler getDTDHandler() {
175 return this.reader.getDTDHandler();
176 }
177
178 /* (non-Javadoc)
179 * @see org.xml.sax.XMLReader#getEntityResolver()
180 */
181 public EntityResolver getEntityResolver() {
182 return this.reader.getEntityResolver();
183 }
184
185 /* (non-Javadoc)
186 * @see org.xml.sax.XMLReader#getErrorHandler()
187 */
188 public ErrorHandler getErrorHandler() {
189 return this.reader.getErrorHandler();
190 }
191
192 /* (non-Javadoc)
193 * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
194 */
195 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
196 return this.reader.getFeature(uri);
197 }
198
199 /* (non-Javadoc)
200 * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
201 */
202 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
203 return this.reader.getProperty(uri);
204 }
205
206 /* (non-Javadoc)
207 * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
208 */
209 public void setDTDHandler(final DTDHandler dtdHandler) {
210 this.reader.setDTDHandler(dtdHandler);
211 }
212
213 /* (non-Javadoc)
214 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
215 */
216 public void setEntityResolver(final EntityResolver entityResolver) {
217 this.reader.setEntityResolver(entityResolver);
218 }
219
220 /* (non-Javadoc)
221 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
222 */
223 public void setErrorHandler(final ErrorHandler errorHandler) {
224 this.reader.setErrorHandler(errorHandler);
225 }
226
227 /* (non-Javadoc)
228 * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
229 */
230 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
231 this.reader.setFeature(uri, value);
232 }
233
234 /* (non-Javadoc)
235 * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
236 */
237 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
238 this.reader.setProperty(uri, value);
239 }
240
241 // ContentHandler interface
242 //-------------------------------------------------------------------------
243 public void startDocument() throws SAXException {
244 parent = null;
245 }
246
247 public void endDocument() throws SAXException {
248 stack.clear();
249 }
250
251 public void startElement(String namespaceURI, String localName, String qName, Attributes list)
252 throws SAXException {
253 addTextToNode();
254
255 Object name = getElementName(namespaceURI, localName, qName);
256
257 int size = list.getLength();
258 Map attributes = new HashMap(size);
259 for (int i = 0; i < size; i++) {
260 Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
261 String value = list.getValue(i);
262 attributes.put(attributeName, value);
263 }
264 parent = new Node(parent, name, attributes, new ArrayList());
265 stack.add(parent);
266 }
267
268 public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
269 addTextToNode();
270
271 if (!stack.isEmpty()) {
272 stack.remove(stack.size() - 1);
273 if (!stack.isEmpty()) {
274 parent = (Node) stack.get(stack.size() - 1);
275 }
276 }
277 }
278
279 public void characters(char buffer[], int start, int length) throws SAXException {
280 bodyText.append(buffer, start, length);
281 }
282
283 public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
284 }
285
286 public void endPrefixMapping(String prefix) throws SAXException {
287 }
288
289 public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
290 }
291
292 public void processingInstruction(String target, String data) throws SAXException {
293 }
294
295 public Locator getDocumentLocator() {
296 return locator;
297 }
298
299 public void setDocumentLocator(Locator locator) {
300 this.locator = locator;
301 }
302
303 public void skippedEntity(String name) throws SAXException {
304 }
305
306 // Implementation methods
307 //-------------------------------------------------------------------------
308 protected XMLReader getXMLReader() {
309 reader.setContentHandler(this);
310 return reader;
311 }
312
313 protected void addTextToNode() {
314 String text = bodyText.toString();
315 if (trimWhitespace) {
316 text = text.trim();
317 }
318 if (text.length() > 0) {
319 parent.children().add(text);
320 }
321 bodyText = new StringBuffer();
322 }
323
324 protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
325 String name = localName;
326 if ((name == null) || (name.length() < 1)) {
327 name = qName;
328 }
329 if (namespaceURI == null || namespaceURI.length() <= 0) {
330 return name;
331 }
332 else {
333 return new QName(namespaceURI, name, qName);
334 }
335 }
336 }