001 // Copyright 2004, 2005 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry.util.xml;
016
017 import java.io.IOException;
018 import java.io.InputStream;
019 import java.net.URL;
020 import java.util.ArrayList;
021 import java.util.HashMap;
022 import java.util.List;
023 import java.util.Map;
024
025 import javax.xml.parsers.ParserConfigurationException;
026 import javax.xml.parsers.SAXParser;
027 import javax.xml.parsers.SAXParserFactory;
028
029 import org.apache.commons.logging.Log;
030 import org.apache.commons.logging.LogFactory;
031 import org.apache.hivemind.ApplicationRuntimeException;
032 import org.apache.hivemind.HiveMind;
033 import org.apache.hivemind.Location;
034 import org.apache.hivemind.Resource;
035 import org.apache.hivemind.impl.LocationImpl;
036 import org.apache.tapestry.Tapestry;
037 import org.apache.tapestry.util.RegexpMatcher;
038 import org.xml.sax.Attributes;
039 import org.xml.sax.InputSource;
040 import org.xml.sax.Locator;
041 import org.xml.sax.SAXException;
042 import org.xml.sax.SAXParseException;
043 import org.xml.sax.helpers.DefaultHandler;
044
045 /**
046 * A simplified version of {@link org.apache.commons.digester.Digester}. This version is without as
047 * many bells and whistles but has some key features needed when parsing a document (rather than a
048 * configuration file): <br>
049 * <ul>
050 * <li>Notifications for each bit of text
051 * </ul>
052 * <li>Tracking of exact location within the document.</li>
053 * </ul>
054 * <p>
055 * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more
056 * coding), in that there's a one-to-one relationship between an element and a rule.
057 * <p>
058 * Based on SAX2.
059 *
060 * @author Howard Lewis Ship
061 * @since 3.0
062 */
063
064 public class RuleDirectedParser extends DefaultHandler
065 {
066 private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
067
068 private Resource _documentLocation;
069
070 private List _ruleStack = new ArrayList();
071
072 private List _objectStack = new ArrayList();
073
074 private Object _documentObject;
075
076 private Locator _locator;
077
078 private int _line = -1;
079
080 private int _column = -1;
081
082 private Location _location;
083
084 private static SAXParserFactory _parserFactory;
085
086 private SAXParser _parser;
087
088 private RegexpMatcher _matcher;
089
090 private String _uri;
091
092 private String _localName;
093
094 private String _qName;
095
096 /**
097 * Map of {@link IRule}keyed on the local name of the element.
098 */
099 private Map _ruleMap = new HashMap();
100
101 /**
102 * Used to accumlate content provided by
103 * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
104 */
105
106 private StringBuffer _contentBuffer = new StringBuffer();
107
108 /**
109 * Map of paths to external entities (such as the DTD) keyed on public id.
110 */
111
112 private Map _entities = new HashMap();
113
114 public Object parse(Resource documentLocation)
115 {
116 if (LOG.isDebugEnabled())
117 LOG.debug("Parsing: " + documentLocation);
118
119 try
120 {
121 _documentLocation = documentLocation;
122
123 URL url = documentLocation.getResourceURL();
124
125 if (url == null)
126 throw new DocumentParseException(Tapestry.format(
127 "RuleDrivenParser.resource-missing",
128 documentLocation), documentLocation);
129
130 return parse(url);
131 }
132 finally
133 {
134 _documentLocation = null;
135 _ruleStack.clear();
136 _objectStack.clear();
137 _documentObject = null;
138
139 _uri = null;
140 _localName = null;
141 _qName = null;
142
143 _line = -1;
144 _column = -1;
145 _location = null;
146 _locator = null;
147
148 _contentBuffer.setLength(0);
149 }
150 }
151
152 protected Object parse(URL url)
153 {
154 if (_parser == null)
155 _parser = constructParser();
156
157 InputStream stream = null;
158
159 try
160 {
161 stream = url.openStream();
162 }
163 catch (IOException ex)
164 {
165 throw new DocumentParseException(Tapestry.format(
166 "RuleDrivenParser.unable-to-open-resource",
167 url), _documentLocation, ex);
168 }
169
170 InputSource source = new InputSource(stream);
171
172 try
173 {
174 _parser.parse(source, this);
175
176 stream.close();
177 }
178 catch (Exception ex)
179 {
180 throw new DocumentParseException(Tapestry.format(
181 "RuleDrivenParser.parse-error",
182 url,
183 ex.getMessage()), getLocation(), ex);
184 }
185
186 if (LOG.isDebugEnabled())
187 LOG.debug("Document parsed as: " + _documentObject);
188
189 return _documentObject;
190 }
191
192 /**
193 * Returns an {@link ILocation}representing the current position within the document (depending
194 * on the parser, this may be accurate to column number level).
195 */
196
197 public Location getLocation()
198 {
199 if (_locator == null)
200 return null;
201
202 int line = _locator.getLineNumber();
203 int column = _locator.getColumnNumber();
204
205 if (_line != line || _column != column)
206 {
207 _location = null;
208 _line = line;
209 _column = column;
210 }
211
212 if (_location == null)
213 _location = new LocationImpl(_documentLocation, _line, _column);
214
215 return _location;
216 }
217
218 /**
219 * Pushes an object onto the object stack. The first object pushed is the "document object", the
220 * root object returned by the parse.
221 */
222 public void push(Object object)
223 {
224 if (_documentObject == null)
225 _documentObject = object;
226
227 push(_objectStack, object, "object stack");
228 }
229
230 /**
231 * Returns the top object on the object stack.
232 */
233 public Object peek()
234 {
235 return peek(_objectStack, 0);
236 }
237
238 /**
239 * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is
240 * the next-to-top object, etc.
241 */
242
243 public Object peek(int depth)
244 {
245 return peek(_objectStack, depth);
246 }
247
248 /**
249 * Removes and returns the top object on the object stack.
250 */
251 public Object pop()
252 {
253 return pop(_objectStack, "object stack");
254 }
255
256 private Object pop(List list, String name)
257 {
258 Object result = list.remove(list.size() - 1);
259
260 if (LOG.isDebugEnabled())
261 LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
262
263 return result;
264 }
265
266 private Object peek(List list, int depth)
267 {
268 return list.get(list.size() - 1 - depth);
269 }
270
271 private void push(List list, Object object, String name)
272 {
273 if (LOG.isDebugEnabled())
274 LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
275
276 list.add(object);
277 }
278
279 /**
280 * Pushes a new rule onto the rule stack.
281 */
282
283 protected void pushRule(IRule rule)
284 {
285 push(_ruleStack, rule, "rule stack");
286 }
287
288 /**
289 * Returns the top rule on the stack.
290 */
291
292 protected IRule peekRule()
293 {
294 return (IRule) peek(_ruleStack, 0);
295 }
296
297 protected IRule popRule()
298 {
299 return (IRule) pop(_ruleStack, "rule stack");
300 }
301
302 public void addRule(String localElementName, IRule rule)
303 {
304 _ruleMap.put(localElementName, rule);
305 }
306
307 /**
308 * Registers a public id and corresponding input source. Generally, the source is a wrapper
309 * around an input stream to a package resource.
310 *
311 * @param publicId
312 * the public identifier to be registerred, generally the publicId of a DTD related
313 * to the document being parsed
314 * @param entityPath
315 * the resource path of the entity, typically a DTD file. Relative files names are
316 * expected to be stored in the same package as the class file, otherwise a leading
317 * slash is an absolute pathname within the classpath.
318 */
319
320 public void registerEntity(String publicId, String entityPath)
321 {
322 if (LOG.isDebugEnabled())
323 LOG.debug("Registering " + publicId + " as " + entityPath);
324
325 if (_entities == null)
326 _entities = new HashMap();
327
328 _entities.put(publicId, entityPath);
329 }
330
331 protected IRule selectRule(String localName, Attributes attributes)
332 {
333 IRule rule = (IRule) _ruleMap.get(localName);
334
335 if (rule == null)
336 throw new DocumentParseException(Tapestry.format(
337 "RuleDrivenParser.no-rule-for-element",
338 localName), getLocation());
339
340 return rule;
341 }
342
343 /**
344 * Uses the {@link Locator}to track the position in the document as a {@link ILocation}. This
345 * is invoked once (before the initial element is parsed) and the Locator is retained and
346 * queried as to the current file location.
347 *
348 * @see #getLocation()
349 */
350 public void setDocumentLocator(Locator locator)
351 {
352 _locator = locator;
353 }
354
355 /**
356 * Accumulates the content in a buffer; the concatinated content is provided to the top rule
357 * just before any start or end tag.
358 */
359 public void characters(char[] ch, int start, int length) throws SAXException
360 {
361 _contentBuffer.append(ch, start, length);
362 }
363
364 /**
365 * Pops the top rule off the stack and invokes {@link IRule#endElementt(RuleDirectedParser)}.
366 */
367 public void endElement(String uri, String localName, String qName) throws SAXException
368 {
369 fireContentRule();
370
371 _uri = uri;
372 _localName = localName;
373 _qName = qName;
374
375 popRule().endElement(this);
376 }
377
378 /**
379 * Ignorable content is ignored.
380 */
381 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
382 {
383 }
384
385 /**
386 * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto
387 * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}.
388 */
389 public void startElement(String uri, String localName, String qName, Attributes attributes)
390 throws SAXException
391 {
392 fireContentRule();
393
394 _uri = uri;
395 _localName = localName;
396 _qName = qName;
397
398 String name = extractName(uri, localName, qName);
399
400 IRule newRule = selectRule(name, attributes);
401
402 pushRule(newRule);
403
404 newRule.startElement(this, attributes);
405 }
406
407 private String extractName(String uri, String localName, String qName)
408 {
409 return HiveMind.isBlank(localName) ? qName : localName;
410 }
411
412 /**
413 * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2
414 * parser.
415 */
416 protected synchronized SAXParser constructParser()
417 {
418 if (_parserFactory == null)
419 {
420 _parserFactory = SAXParserFactory.newInstance();
421 configureParserFactory(_parserFactory);
422 }
423
424 try
425 {
426 return _parserFactory.newSAXParser();
427 }
428 catch (SAXException ex)
429 {
430 throw new ApplicationRuntimeException(ex);
431 }
432 catch (ParserConfigurationException ex)
433 {
434 throw new ApplicationRuntimeException(ex);
435 }
436
437 }
438
439 /**
440 * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is
441 * invoked. The default implementation sets validating to true and namespaceAware to false,
442 */
443
444 protected void configureParserFactory(SAXParserFactory factory)
445 {
446 factory.setValidating(true);
447 factory.setNamespaceAware(false);
448 }
449
450 /**
451 * Throws the exception.
452 */
453 public void error(SAXParseException ex) throws SAXException
454 {
455 fatalError(ex);
456 }
457
458 /**
459 * Throws the exception.
460 */
461 public void fatalError(SAXParseException ex) throws SAXException
462 {
463 // Sometimes, a bad parse "corrupts" a parser so that it doesn't
464 // work properly for future parses (of valid documents),
465 // so discard it here.
466
467 _parser = null;
468
469 throw ex;
470 }
471
472 /**
473 * Throws the exception.
474 */
475 public void warning(SAXParseException ex) throws SAXException
476 {
477 fatalError(ex);
478 }
479
480 public InputSource resolveEntity(String publicId, String systemId) throws SAXException
481 {
482 String entityPath = null;
483
484 if (LOG.isDebugEnabled())
485 LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = "
486 + systemId);
487
488 if (_entities != null)
489 entityPath = (String) _entities.get(publicId);
490
491 if (entityPath == null)
492 {
493 if (LOG.isDebugEnabled())
494 LOG.debug("Entity not found, using " + systemId);
495
496 return null;
497 }
498
499 InputStream stream = getClass().getResourceAsStream(entityPath);
500
501 InputSource result = new InputSource(stream);
502
503 if (result != null && LOG.isDebugEnabled())
504 LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
505
506 return result;
507 }
508
509 /**
510 * Validates that the input value matches against the specified Perl5 pattern. If valid, the
511 * method simply returns. If not a match, then an error message is generated (using the errorKey
512 * and the input value) and a {@link InvalidStringException}is thrown.
513 */
514
515 public void validate(String value, String pattern, String errorKey)
516 throws DocumentParseException
517 {
518 if (_matcher == null)
519 _matcher = new RegexpMatcher();
520
521 if (_matcher.matches(pattern, value))
522 return;
523
524 throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
525 }
526
527 public Resource getDocumentLocation()
528 {
529 return _documentLocation;
530 }
531
532 /**
533 * Returns the localName for the current element.
534 *
535 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
536 * java.lang.String, org.xml.sax.Attributes)
537 */
538 public String getLocalName()
539 {
540 return _localName;
541 }
542
543 /**
544 * Returns the qualified name for the current element.
545 *
546 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
547 * java.lang.String, org.xml.sax.Attributes)
548 */
549 public String getQName()
550 {
551 return _qName;
552 }
553
554 /**
555 * Returns the URI for the current element.
556 *
557 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
558 * java.lang.String, org.xml.sax.Attributes)
559 */
560 public String getUri()
561 {
562 return _uri;
563 }
564
565 private void fireContentRule()
566 {
567 String content = _contentBuffer.toString();
568 _contentBuffer.setLength(0);
569
570 if (!_ruleStack.isEmpty())
571 peekRule().content(this, content);
572 }
573
574 }