001 // Copyright 2004, 2005 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry.parse;
016
017 import java.util.ArrayList;
018 import java.util.Collections;
019 import java.util.HashMap;
020 import java.util.Iterator;
021 import java.util.List;
022 import java.util.Map;
023
024 import org.apache.hivemind.ApplicationRuntimeException;
025 import org.apache.hivemind.Location;
026 import org.apache.hivemind.Resource;
027 import org.apache.hivemind.impl.LocationImpl;
028 import org.apache.oro.text.regex.MalformedPatternException;
029 import org.apache.oro.text.regex.MatchResult;
030 import org.apache.oro.text.regex.Pattern;
031 import org.apache.oro.text.regex.PatternMatcher;
032 import org.apache.oro.text.regex.Perl5Compiler;
033 import org.apache.oro.text.regex.Perl5Matcher;
034 import org.apache.tapestry.util.IdAllocator;
035
036 /**
037 * Parses Tapestry templates, breaking them into a series of
038 * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML
039 * template", there is no real requirement that the template be HTML. This parser can handle any
040 * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of
041 * HTML reasonably.
042 * <p>
043 * Deployed as the tapestry.parse.TemplateParser service, using the threaded model.
044 * <p>
045 * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a
046 * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag,
047 * or end the tag with "<code>/></code>".
048 * <p>
049 * Generally, the id specified in the template is matched against an component defined in the
050 * specification. However, implicit components are also possible. The jwcid attribute uses the
051 * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id
052 * prefix. Such a component is anonymous (but is given a unique id).
053 * <p>
054 * (The unique ids assigned start with a dollar sign, which is normally no allowed for
055 * component ids ... this helps to make them stand out and assures that they do not conflict
056 * with user-defined component ids. These ids tend to propagate into URLs and become HTML
057 * element names and even JavaScript variable names ... the dollar sign is acceptible in these
058 * contexts as well).
059 * <p>
060 * Implicit component may also be given a name using the syntax "
061 * <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the
062 * specification, but may still be accessed via
063 * {@link org.apache.tapestry.IComponent#getComponent(String)}.
064 * <p>
065 * Both defined and implicit components may have additional attributes defined, simply by
066 * including them in the template. They set formal or informal parameters of the component to
067 * static strings.
068 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if
069 * false, will cause such attributes to be simply ignored. For defined components, conflicting
070 * values defined in the template are ignored.
071 * <p>
072 * Attributes in component tags will become formal and informal parameters of the
073 * corresponding component. Most attributes will be
074 * <p>
075 * The parser removes the body of some tags (when the corresponding component doesn't
076 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and
077 * allows portions of the template to be completely removed.
078 * <p>
079 * The parser does a pretty thorough lexical analysis of the template, and reports a great
080 * number of errors, including improper nesting of tags.
081 * <p>
082 * The parser supports <em>invisible localization</em>: The parser recognizes HTML of the
083 * form: <code><span key="<i>value</i>"> ... </span></code> and converts them
084 * into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code>
085 * attribute ... if the value is <code>true</code>, then the localized value is sent to the
086 * client without filtering, which is appropriate if the value has any markup that should not
087 * be escaped.
088 * @author Howard Lewis Ship, Geoff Longman
089 */
090
091 public class TemplateParser implements ITemplateParser
092 {
093 /**
094 * A "magic" component id that causes the tag with the id and its entire body to be ignored
095 * during parsing.
096 */
097
098 private static final String REMOVE_ID = "$remove$";
099
100 /**
101 * A "magic" component id that causes the tag to represent the true content of the template. Any
102 * content prior to the tag is discarded, and any content after the tag is ignored. The tag
103 * itself is not included.
104 */
105
106 private static final String CONTENT_ID = "$content$";
107
108 /**
109 * The attribute, checked for in <span> tags, that signfies that the span is being used as
110 * an invisible localization.
111 *
112 * @since 2.0.4
113 */
114
115 public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key";
116
117 /**
118 * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be
119 * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must
120 * equal "true" (caselessly).
121 *
122 * @since 2.3
123 */
124
125 public static final String RAW_ATTRIBUTE_NAME = "raw";
126
127 /**
128 * Attribute name used to identify components.
129 *
130 * @since 4.0
131 */
132
133 private String _componentAttributeName;
134
135 private static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*";
136
137 /**
138 * Pattern used to recognize ordinary components (defined in the specification).
139 *
140 * @since 3.0
141 */
142
143 public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$";
144
145 /**
146 * Pattern used to recognize implicit components (whose type is defined in the template).
147 * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified
148 * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component
149 * type, which may (as of 4.0) have slashes to delinate folders containing the component.
150 *
151 * @since 3.0
152 */
153
154 public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@((("
155 + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*"
156 + PROPERTY_NAME_PATTERN + "))$";
157
158 private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1;
159
160 private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2;
161
162 private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4;
163
164 private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5;
165
166 private Pattern _simpleIdPattern;
167
168 private Pattern _implicitIdPattern;
169
170 private PatternMatcher _patternMatcher;
171
172 private IdAllocator _idAllocator = new IdAllocator();
173
174 private ITemplateParserDelegate _delegate;
175
176 /**
177 * Identifies the template being parsed; used with error messages.
178 */
179
180 private Resource _resourceLocation;
181
182 /**
183 * Shared instance of {@link Location} used by all {@link TextToken} instances in the template.
184 */
185
186 private Location _templateLocation;
187
188 /**
189 * Location with in the resource for the current line.
190 */
191
192 private Location _currentLocation;
193
194 /**
195 * Local reference to the template data that is to be parsed.
196 */
197
198 private char[] _templateData;
199
200 /**
201 * List of Tag
202 */
203
204 private List _stack = new ArrayList();
205
206 private static class Tag
207 {
208 // The element, i.e., <jwc> or virtually any other element (via jwcid attribute)
209 String _tagName;
210
211 // If true, the tag is a placeholder for a dynamic element
212 boolean _component;
213
214 // If true, the body of the tag is being ignored, and the
215 // ignore flag is cleared when the close tag is reached
216 boolean _ignoringBody;
217
218 // If true, then the entire tag (and its body) is being ignored
219 boolean _removeTag;
220
221 // If true, then the tag must have a balanced closing tag.
222 // This is always true for components.
223 boolean _mustBalance;
224
225 // The line on which the start tag exists
226 int _line;
227
228 // If true, then the parse ends when the closing tag is found.
229 boolean _content;
230
231 Tag(String tagName, int line)
232 {
233 _tagName = tagName;
234 _line = line;
235 }
236
237 boolean match(String matchTagName)
238 {
239 return _tagName.equalsIgnoreCase(matchTagName);
240 }
241 }
242
243 /**
244 * List of {@link TemplateToken}, this forms the ultimate response.
245 */
246
247 private List _tokens = new ArrayList();
248
249 /**
250 * The location of the 'cursor' within the template data. The advance() method moves this
251 * forward.
252 */
253
254 private int _cursor;
255
256 /**
257 * The start of the current block of static text, or -1 if no block is active.
258 */
259
260 private int _blockStart;
261
262 /**
263 * The current line number; tracked by advance(). Starts at 1.
264 */
265
266 private int _line;
267
268 /**
269 * Set to true when the body of a tag is being ignored. This is typically used to skip over the
270 * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid
271 * of $remove$ is used.
272 */
273
274 private boolean _ignoring;
275
276 /**
277 * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag.
278 */
279
280 private Map _attributes = new HashMap();
281
282 /**
283 * A factory used to create template tokens.
284 */
285
286 private TemplateTokenFactory _factory;
287
288 public TemplateParser()
289 {
290 Perl5Compiler compiler = new Perl5Compiler();
291
292 try
293 {
294 _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN);
295 _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN);
296 }
297 catch (MalformedPatternException ex)
298 {
299 throw new ApplicationRuntimeException(ex);
300 }
301
302 _patternMatcher = new Perl5Matcher();
303 }
304
305 /**
306 * Parses the template data into an array of {@link TemplateToken}s.
307 * <p>
308 * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single
309 * thread accesses it.
310 *
311 * @param templateData
312 * the HTML template to parse. Some tokens will hold a reference to this array.
313 * @param delegate
314 * object that "knows" about defined components
315 * @param resourceLocation
316 * a description of where the template originated from, used with error messages.
317 */
318
319 public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate,
320 Resource resourceLocation) throws TemplateParseException
321 {
322 try
323 {
324 beforeParse(templateData, delegate, resourceLocation);
325
326 parse();
327
328 return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]);
329 }
330 finally
331 {
332 afterParse();
333 }
334 }
335
336 /**
337 * perform default initialization of the parser.
338 */
339
340 protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate,
341 Resource resourceLocation)
342 {
343 _templateData = templateData;
344 _resourceLocation = resourceLocation;
345 _templateLocation = new LocationImpl(resourceLocation);
346 _delegate = delegate;
347 _ignoring = false;
348 _line = 1;
349 _componentAttributeName = delegate.getComponentAttributeName();
350 }
351
352 /**
353 * Perform default cleanup after parsing completes.
354 */
355
356 protected void afterParse()
357 {
358 _delegate = null;
359 _templateData = null;
360 _resourceLocation = null;
361 _templateLocation = null;
362 _currentLocation = null;
363 _stack.clear();
364 _tokens.clear();
365 _attributes.clear();
366 _idAllocator.clear();
367 }
368
369 /**
370 * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem
371 * is reported.
372 * <p>
373 * The default implementation simply throws an exception that contains the message and location
374 * parameters.
375 * <p>
376 * Subclasses may override but <b>must </b> ensure they throw the required exception.
377 *
378 * @param message
379 * @param location
380 * @param line
381 * ignored by the default impl
382 * @param cursor
383 * ignored by the default impl
384 * @throws TemplateParseException
385 * always thrown in order to terminate the parse.
386 */
387
388 protected void templateParseProblem(String message, Location location, int line, int cursor)
389 throws TemplateParseException
390 {
391 throw new TemplateParseException(message, location);
392 }
393
394 /**
395 * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must
396 * </b> stop when a problem is reported.
397 * <p>
398 * The default implementation simply rethrows the exception.
399 * <p>
400 * Subclasses may override but <b>must </b> ensure they rethrow the exception.
401 *
402 * @param exception
403 * @param line
404 * ignored by the default impl
405 * @param cursor
406 * ignored by the default impl
407 * @throws ApplicationRuntimeException
408 * always rethrown in order to terminate the parse.
409 */
410
411 protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor)
412 throws ApplicationRuntimeException
413 {
414 throw exception;
415 }
416
417 /**
418 * Give subclasses access to the parse results.
419 */
420 protected List getTokens()
421 {
422 if (_tokens == null)
423 return Collections.EMPTY_LIST;
424
425 return _tokens;
426 }
427
428 /**
429 * Checks to see if the next few characters match a given pattern.
430 */
431
432 private boolean lookahead(char[] match)
433 {
434 try
435 {
436 for (int i = 0; i < match.length; i++)
437 {
438 if (_templateData[_cursor + i] != match[i])
439 return false;
440 }
441
442 // Every character matched.
443
444 return true;
445 }
446 catch (IndexOutOfBoundsException ex)
447 {
448 return false;
449 }
450 }
451
452 private static final char[] COMMENT_START = new char[]
453 { '<', '!', '-', '-' };
454
455 private static final char[] COMMENT_END = new char[]
456 { '-', '-', '>' };
457
458 private static final char[] CLOSE_TAG = new char[]
459 { '<', '/' };
460
461 protected void parse() throws TemplateParseException
462 {
463 _cursor = 0;
464 _blockStart = -1;
465 int length = _templateData.length;
466
467 while (_cursor < length)
468 {
469 if (_templateData[_cursor] != '<')
470 {
471 if (_blockStart < 0 && !_ignoring)
472 _blockStart = _cursor;
473
474 advance();
475 continue;
476 }
477
478 // OK, start of something.
479
480 if (lookahead(CLOSE_TAG))
481 {
482 closeTag();
483 continue;
484 }
485
486 if (lookahead(COMMENT_START))
487 {
488 skipComment();
489 continue;
490 }
491
492 // The start of some tag.
493
494 startTag();
495 }
496
497 // Usually there's some text at the end of the template (after the last closing tag) that
498 // should
499 // be added. Often the last few tags are static tags so we definately
500 // need to end the text block.
501
502 addTextToken(_templateData.length - 1);
503 }
504
505 /**
506 * Advance forward in the document until the end of the comment is reached. In addition, skip
507 * any whitespace following the comment.
508 */
509
510 private void skipComment() throws TemplateParseException
511 {
512 int length = _templateData.length;
513 int startLine = _line;
514
515 if (_blockStart < 0 && !_ignoring)
516 _blockStart = _cursor;
517
518 while (true)
519 {
520 if (_cursor >= length)
521 templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl(
522 _resourceLocation, startLine), startLine, _cursor);
523
524 if (lookahead(COMMENT_END))
525 break;
526
527 // Not the end of the comment, advance over it.
528
529 advance();
530 }
531
532 _cursor += COMMENT_END.length;
533 advanceOverWhitespace();
534 }
535
536 private void addTextToken(int end)
537 {
538 // No active block to add to.
539
540 if (_blockStart < 0)
541 return;
542
543 if (_blockStart <= end)
544 {
545 // This seems odd, shouldn't the location be the current location? I guess
546 // no errors are ever reported for a text token.
547
548 TemplateToken token = _factory.createTextToken(
549 _templateData,
550 _blockStart,
551 end,
552 _templateLocation);
553
554 _tokens.add(token);
555 }
556
557 _blockStart = -1;
558 }
559
560 private static final int WAIT_FOR_ATTRIBUTE_NAME = 0;
561
562 private static final int COLLECT_ATTRIBUTE_NAME = 1;
563
564 private static final int ADVANCE_PAST_EQUALS = 2;
565
566 private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3;
567
568 private static final int COLLECT_QUOTED_VALUE = 4;
569
570 private static final int COLLECT_UNQUOTED_VALUE = 5;
571
572 private void startTag() throws TemplateParseException
573 {
574 int cursorStart = _cursor;
575 int length = _templateData.length;
576 String tagName = null;
577 boolean endOfTag = false;
578 boolean emptyTag = false;
579 int startLine = _line;
580 Location startLocation = new LocationImpl(_resourceLocation, startLine);
581
582 tagBeginEvent(startLine, _cursor);
583
584 advance();
585
586 // Collect the element type
587
588 while (_cursor < length)
589 {
590 char ch = _templateData[_cursor];
591
592 if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
593 {
594 tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1);
595
596 break;
597 }
598
599 advance();
600 }
601
602 String attributeName = null;
603 int attributeNameStart = -1;
604 int attributeValueStart = -1;
605 int state = WAIT_FOR_ATTRIBUTE_NAME;
606 char quoteChar = 0;
607
608 _attributes.clear();
609
610 // Collect each attribute
611
612 while (!endOfTag)
613 {
614 if (_cursor >= length)
615 {
616 String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine)
617 : ParseMessages.unclosedTag(tagName, startLine);
618
619 templateParseProblem(message, startLocation, startLine, cursorStart);
620 }
621
622 char ch = _templateData[_cursor];
623
624 switch (state)
625 {
626 case WAIT_FOR_ATTRIBUTE_NAME:
627
628 // Ignore whitespace before the next attribute name, while
629 // looking for the end of the current tag.
630
631 if (ch == '/')
632 {
633 emptyTag = true;
634 advance();
635 break;
636 }
637
638 if (ch == '>')
639 {
640 endOfTag = true;
641 break;
642 }
643
644 if (Character.isWhitespace(ch))
645 {
646 advance();
647 break;
648 }
649
650 // Found non-whitespace, assume its the attribute name.
651 // Note: could use a check here for non-alpha.
652
653 attributeNameStart = _cursor;
654 state = COLLECT_ATTRIBUTE_NAME;
655 advance();
656 break;
657
658 case COLLECT_ATTRIBUTE_NAME:
659
660 // Looking for end of attribute name.
661
662 if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch))
663 {
664 attributeName = new String(_templateData, attributeNameStart, _cursor
665 - attributeNameStart);
666
667 state = ADVANCE_PAST_EQUALS;
668 break;
669 }
670
671 // Part of the attribute name
672
673 advance();
674 break;
675
676 case ADVANCE_PAST_EQUALS:
677
678 // Looking for the '=' sign. May hit the end of the tag, or (for bare
679 // attributes),
680 // the next attribute name.
681
682 if (ch == '/' || ch == '>')
683 {
684 // A bare attribute, which is not interesting to
685 // us.
686
687 state = WAIT_FOR_ATTRIBUTE_NAME;
688 break;
689 }
690
691 if (Character.isWhitespace(ch))
692 {
693 advance();
694 break;
695 }
696
697 if (ch == '=')
698 {
699 state = WAIT_FOR_ATTRIBUTE_VALUE;
700 quoteChar = 0;
701 attributeValueStart = -1;
702 advance();
703 break;
704 }
705
706 // Otherwise, an HTML style "bare" attribute (such as <select multiple>).
707 // We aren't interested in those (we're just looking for the id or jwcid
708 // attribute).
709
710 state = WAIT_FOR_ATTRIBUTE_NAME;
711 break;
712
713 case WAIT_FOR_ATTRIBUTE_VALUE:
714
715 if (ch == '/' || ch == '>')
716 templateParseProblem(ParseMessages.missingAttributeValue(
717 tagName,
718 _line,
719 attributeName), getCurrentLocation(), _line, _cursor);
720
721 // Ignore whitespace between '=' and the attribute value. Also, look
722 // for initial quote.
723
724 if (Character.isWhitespace(ch))
725 {
726 advance();
727 break;
728 }
729
730 if (ch == '\'' || ch == '"')
731 {
732 quoteChar = ch;
733
734 state = COLLECT_QUOTED_VALUE;
735 advance();
736 attributeValueStart = _cursor;
737 attributeBeginEvent(attributeName, _line, attributeValueStart);
738 break;
739 }
740
741 // Not whitespace or quote, must be start of unquoted attribute.
742
743 state = COLLECT_UNQUOTED_VALUE;
744 attributeValueStart = _cursor;
745 attributeBeginEvent(attributeName, _line, attributeValueStart);
746 break;
747
748 case COLLECT_QUOTED_VALUE:
749
750 // Start collecting the quoted attribute value. Stop at the matching quote
751 // character,
752 // unless bare, in which case, stop at the next whitespace.
753
754 if (ch == quoteChar)
755 {
756 String attributeValue = new String(_templateData, attributeValueStart,
757 _cursor - attributeValueStart);
758
759 attributeEndEvent(_cursor);
760
761 addAttributeIfUnique(tagName, attributeName, attributeValue);
762
763 // Advance over the quote.
764 advance();
765 state = WAIT_FOR_ATTRIBUTE_NAME;
766 break;
767 }
768
769 advance();
770 break;
771
772 case COLLECT_UNQUOTED_VALUE:
773
774 // An unquoted attribute value ends with whitespace
775 // or the end of the enclosing tag.
776
777 if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
778 {
779 String attributeValue = new String(_templateData, attributeValueStart,
780 _cursor - attributeValueStart);
781
782 attributeEndEvent(_cursor);
783 addAttributeIfUnique(tagName, attributeName, attributeValue);
784
785 state = WAIT_FOR_ATTRIBUTE_NAME;
786 break;
787 }
788
789 advance();
790 break;
791 }
792 }
793
794 tagEndEvent(_cursor);
795
796 // Check for invisible localizations
797
798 String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes);
799 String jwcId = findValueCaselessly(_componentAttributeName, _attributes);
800
801 if (localizationKey != null && tagName.equalsIgnoreCase("span") && jwcId == null)
802 {
803 if (_ignoring)
804 templateParseProblem(
805 ParseMessages.componentMayNotBeIgnored(tagName, startLine),
806 startLocation,
807 startLine,
808 cursorStart);
809
810 // If the tag isn't empty, then create a Tag instance to ignore the
811 // body of the tag.
812
813 if (!emptyTag)
814 {
815 Tag tag = new Tag(tagName, startLine);
816
817 tag._component = false;
818 tag._removeTag = true;
819 tag._ignoringBody = true;
820 tag._mustBalance = true;
821
822 _stack.add(tag);
823
824 // Start ignoring content until the close tag.
825
826 _ignoring = true;
827 }
828 else
829 {
830 // Cursor is at the closing carat, advance over it.
831 advance();
832 // TAPESTRY-359: *don't* skip whitespace
833 }
834
835 // End any open block.
836
837 addTextToken(cursorStart - 1);
838
839 boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes);
840
841 Map attributes = filter(_attributes, new String[]
842 { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME });
843
844 TemplateToken token = _factory.createLocalizationToken(
845 tagName,
846 localizationKey,
847 raw,
848 attributes,
849 startLocation);
850
851 _tokens.add(token);
852
853 return;
854 }
855
856 if (jwcId != null)
857 {
858 processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation);
859 return;
860 }
861
862 // A static tag (not a tag without a jwcid attribute).
863 // We need to record this so that we can match close tags later.
864
865 if (!emptyTag)
866 {
867 Tag tag = new Tag(tagName, startLine);
868 _stack.add(tag);
869 }
870
871 // If there wasn't an active block, then start one.
872
873 if (_blockStart < 0 && !_ignoring)
874 _blockStart = cursorStart;
875
876 advance();
877 }
878
879 /**
880 * @throws TemplateParseException
881 * @since 4.0
882 */
883
884 private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue)
885 throws TemplateParseException
886 {
887
888 if (_attributes.containsKey(attributeName))
889 templateParseProblem(
890 ParseMessages.duplicateTagAttribute(tagName, _line, attributeName),
891 getCurrentLocation(),
892 _line,
893 _cursor);
894
895 _attributes.put(attributeName, attributeValue);
896 }
897
898 /**
899 * Processes a tag that is the open tag for a component (but also handles the $remove$ and
900 * $content$ tags).
901 */
902
903 /**
904 * Notify that the beginning of a tag has been detected.
905 * <p>
906 * Default implementation does nothing.
907 */
908 protected void tagBeginEvent(int startLine, int cursorPosition)
909 {
910 }
911
912 /**
913 * Notify that the end of the current tag has been detected.
914 * <p>
915 * Default implementation does nothing.
916 */
917 protected void tagEndEvent(int cursorPosition)
918 {
919 }
920
921 /**
922 * Notify that the beginning of an attribute value has been detected.
923 * <p>
924 * Default implementation does nothing.
925 */
926 protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition)
927 {
928 }
929
930 /**
931 * Notify that the end of the current attribute value has been detected.
932 * <p>
933 * Default implementation does nothing.
934 */
935 protected void attributeEndEvent(int cursorPosition)
936 {
937 }
938
939 private void processComponentStart(String tagName, String jwcId, boolean emptyTag,
940 int startLine, int cursorStart, Location startLocation) throws TemplateParseException
941 {
942 if (jwcId.equalsIgnoreCase(CONTENT_ID))
943 {
944 processContentTag(tagName, startLine, cursorStart, emptyTag);
945
946 return;
947 }
948
949 boolean isRemoveId = jwcId.equalsIgnoreCase(REMOVE_ID);
950
951 if (_ignoring && !isRemoveId)
952 templateParseProblem(
953 ParseMessages.componentMayNotBeIgnored(tagName, startLine),
954 startLocation,
955 startLine,
956 cursorStart);
957
958 String type = null;
959 boolean allowBody = false;
960
961 if (_patternMatcher.matches(jwcId, _implicitIdPattern))
962 {
963 MatchResult match = _patternMatcher.getMatch();
964
965 jwcId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP);
966 type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP);
967
968 String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP);
969 String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP);
970
971 // If (and this is typical) no actual component id was specified,
972 // then generate one on the fly.
973 // The allocated id for anonymous components is
974 // based on the simple (unprefixed) type, but starts
975 // with a leading dollar sign to ensure no conflicts
976 // with user defined component ids (which don't allow dollar signs
977 // in the id).
978 // New for 4.0: the component type may included slashes ('/'), but these
979 // are not valid identifiers, so we convert them to '$'.
980
981 if (jwcId == null)
982 jwcId = _idAllocator.allocateId("$" + simpleType.replace('/', '$'));
983
984 try
985 {
986 allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation);
987 }
988 catch (ApplicationRuntimeException e)
989 {
990 // give subclasses a chance to handle and rethrow
991 templateParseProblem(e, startLine, cursorStart);
992 }
993
994 }
995 else
996 {
997 if (!isRemoveId)
998 {
999 if (!_patternMatcher.matches(jwcId, _simpleIdPattern))
1000 templateParseProblem(
1001 ParseMessages.componentIdInvalid(tagName, startLine, jwcId),
1002 startLocation,
1003 startLine,
1004 cursorStart);
1005
1006 if (!_delegate.getKnownComponent(jwcId))
1007 templateParseProblem(
1008 ParseMessages.unknownComponentId(tagName, startLine, jwcId),
1009 startLocation,
1010 startLine,
1011 cursorStart);
1012
1013 try
1014 {
1015 allowBody = _delegate.getAllowBody(jwcId, startLocation);
1016 }
1017 catch (ApplicationRuntimeException e)
1018 {
1019 // give subclasses a chance to handle and rethrow
1020 templateParseProblem(e, startLine, cursorStart);
1021 }
1022 }
1023 }
1024
1025 // Ignore the body if we're removing the entire tag,
1026 // of if the corresponding component doesn't allow
1027 // a body.
1028
1029 boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody);
1030
1031 if (_ignoring && ignoreBody)
1032 templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl(
1033 _resourceLocation, startLine), startLine, cursorStart);
1034
1035 if (!emptyTag)
1036 pushNewTag(tagName, startLine, isRemoveId, ignoreBody);
1037
1038 // End any open block.
1039
1040 addTextToken(cursorStart - 1);
1041
1042 if (!isRemoveId)
1043 {
1044 addOpenToken(tagName, jwcId, type, startLocation);
1045
1046 if (emptyTag)
1047 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1048 }
1049
1050 advance();
1051 }
1052
1053 private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody)
1054 {
1055 Tag tag = new Tag(tagName, startLine);
1056
1057 tag._component = !isRemoveId;
1058 tag._removeTag = isRemoveId;
1059
1060 tag._ignoringBody = ignoreBody;
1061
1062 _ignoring = tag._ignoringBody;
1063
1064 tag._mustBalance = true;
1065
1066 _stack.add(tag);
1067 }
1068
1069 private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag)
1070 throws TemplateParseException
1071 {
1072 if (_ignoring)
1073 templateParseProblem(
1074 ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine),
1075 new LocationImpl(_resourceLocation, startLine),
1076 startLine,
1077 cursorStart);
1078
1079 if (emptyTag)
1080 templateParseProblem(
1081 ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine),
1082 new LocationImpl(_resourceLocation, startLine),
1083 startLine,
1084 cursorStart);
1085
1086 _tokens.clear();
1087 _blockStart = -1;
1088
1089 Tag tag = new Tag(tagName, startLine);
1090
1091 tag._mustBalance = true;
1092 tag._content = true;
1093
1094 _stack.clear();
1095 _stack.add(tag);
1096
1097 advance();
1098 }
1099
1100 private void addOpenToken(String tagName, String jwcId, String type, Location location)
1101 {
1102 OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location);
1103 _tokens.add(token);
1104
1105 if (_attributes.isEmpty())
1106 return;
1107
1108 Iterator i = _attributes.entrySet().iterator();
1109 while (i.hasNext())
1110 {
1111 Map.Entry entry = (Map.Entry) i.next();
1112
1113 String key = (String) entry.getKey();
1114
1115 if (key.equalsIgnoreCase(_componentAttributeName))
1116 continue;
1117
1118 String value = (String) entry.getValue();
1119
1120 addAttributeToToken(token, key, value);
1121 }
1122 }
1123
1124 /**
1125 * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream).
1126 *
1127 * @since 3.0
1128 */
1129
1130 private void addAttributeToToken(OpenToken token, String name, String attributeValue)
1131 {
1132 token.addAttribute(name, convertEntitiesToPlain(attributeValue));
1133 }
1134
1135 /**
1136 * Invoked to handle a closing tag, i.e., </foo>. When a tag closes, it will match against
1137 * a tag on the open tag start. Preferably the top tag on the stack (if everything is well
1138 * balanced), but this is HTML, not XML, so many tags won't balance.
1139 * <p>
1140 * Once the matching tag is located, the question is ... is the tag dynamic or static? If
1141 * static, then the current text block is extended to include this close tag. If dynamic, then
1142 * the current text block is ended (before the '<' that starts the tag) and a close token is
1143 * added.
1144 * <p>
1145 * In either case, the matching static element and anything above it is removed, and the cursor
1146 * is left on the character following the '>'.
1147 */
1148
1149 private void closeTag() throws TemplateParseException
1150 {
1151 int cursorStart = _cursor;
1152 int length = _templateData.length;
1153 int startLine = _line;
1154
1155 Location startLocation = getCurrentLocation();
1156
1157 _cursor += CLOSE_TAG.length;
1158
1159 int tagStart = _cursor;
1160
1161 while (true)
1162 {
1163 if (_cursor >= length)
1164 templateParseProblem(
1165 ParseMessages.incompleteCloseTag(startLine),
1166 startLocation,
1167 startLine,
1168 cursorStart);
1169
1170 char ch = _templateData[_cursor];
1171
1172 if (ch == '>')
1173 break;
1174
1175 advance();
1176 }
1177
1178 String tagName = new String(_templateData, tagStart, _cursor - tagStart);
1179
1180 int stackPos = _stack.size() - 1;
1181 Tag tag = null;
1182
1183 while (stackPos >= 0)
1184 {
1185 tag = (Tag) _stack.get(stackPos);
1186
1187 if (tag.match(tagName))
1188 break;
1189
1190 if (tag._mustBalance)
1191 templateParseProblem(ParseMessages.improperlyNestedCloseTag(
1192 tagName,
1193 startLine,
1194 tag._tagName,
1195 tag._line), startLocation, startLine, cursorStart);
1196
1197 stackPos--;
1198 }
1199
1200 if (stackPos < 0)
1201 templateParseProblem(
1202 ParseMessages.unmatchedCloseTag(tagName, startLine),
1203 startLocation,
1204 startLine,
1205 cursorStart);
1206
1207 // Special case for the content tag
1208
1209 if (tag._content)
1210 {
1211 addTextToken(cursorStart - 1);
1212
1213 // Advance the cursor right to the end.
1214
1215 _cursor = length;
1216 _stack.clear();
1217 return;
1218 }
1219
1220 // When a component closes, add a CLOSE tag.
1221 if (tag._component)
1222 {
1223 addTextToken(cursorStart - 1);
1224
1225 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1226 }
1227 else
1228 {
1229 // The close of a static tag. Unless removing the tag
1230 // entirely, make sure the block tag is part of a text block.
1231
1232 if (_blockStart < 0 && !tag._removeTag && !_ignoring)
1233 _blockStart = cursorStart;
1234 }
1235
1236 // Remove all elements at stackPos or above.
1237
1238 for (int i = _stack.size() - 1; i >= stackPos; i--)
1239 _stack.remove(i);
1240
1241 // Advance cursor past '>'
1242
1243 advance();
1244
1245 // If editting out the tag (i.e., $remove$) then kill any whitespace.
1246 // For components that simply don't contain a body, removeTag will
1247 // be false.
1248
1249 if (tag._removeTag)
1250 advanceOverWhitespace();
1251
1252 // If we were ignoring the body of the tag, then clear the ignoring
1253 // flag, since we're out of the body.
1254
1255 if (tag._ignoringBody)
1256 _ignoring = false;
1257 }
1258
1259 /**
1260 * Advances the cursor to the next character. If the end-of-line is reached, then increments the
1261 * line counter.
1262 */
1263
1264 private void advance()
1265 {
1266 int length = _templateData.length;
1267
1268 if (_cursor >= length)
1269 return;
1270
1271 char ch = _templateData[_cursor];
1272
1273 _cursor++;
1274
1275 if (ch == '\n')
1276 {
1277 _line++;
1278 _currentLocation = null;
1279 return;
1280 }
1281
1282 // A \r, or a \r\n also counts as a new line.
1283
1284 if (ch == '\r')
1285 {
1286 _line++;
1287 _currentLocation = null;
1288
1289 if (_cursor < length && _templateData[_cursor] == '\n')
1290 _cursor++;
1291
1292 return;
1293 }
1294
1295 // Not an end-of-line character.
1296
1297 }
1298
1299 private void advanceOverWhitespace()
1300 {
1301 int length = _templateData.length;
1302
1303 while (_cursor < length)
1304 {
1305 char ch = _templateData[_cursor];
1306 if (!Character.isWhitespace(ch))
1307 return;
1308
1309 advance();
1310 }
1311 }
1312
1313 /**
1314 * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list
1315 * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded
1316 * from the output map. May return null (rather than return an empty Map).
1317 */
1318
1319 private Map filter(Map input, String[] removeKeys)
1320 {
1321 if (input == null || input.isEmpty())
1322 return null;
1323
1324 Map result = null;
1325
1326 Iterator i = input.entrySet().iterator();
1327
1328 nextkey: while (i.hasNext())
1329 {
1330 Map.Entry entry = (Map.Entry) i.next();
1331
1332 String key = (String) entry.getKey();
1333
1334 for (int j = 0; j < removeKeys.length; j++)
1335 {
1336 if (key.equalsIgnoreCase(removeKeys[j]))
1337 continue nextkey;
1338 }
1339
1340 if (result == null)
1341 result = new HashMap(input.size());
1342
1343 result.put(key, entry.getValue());
1344 }
1345
1346 return result;
1347 }
1348
1349 /**
1350 * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys
1351 * and values. Returns the value for the first key found that matches (caselessly) the input
1352 * key. Returns null if no value found.
1353 */
1354
1355 protected String findValueCaselessly(String key, Map map)
1356 {
1357 String result = (String) map.get(key);
1358
1359 if (result != null)
1360 return result;
1361
1362 Iterator i = map.entrySet().iterator();
1363 while (i.hasNext())
1364 {
1365 Map.Entry entry = (Map.Entry) i.next();
1366
1367 String entryKey = (String) entry.getKey();
1368
1369 if (entryKey.equalsIgnoreCase(key))
1370 return (String) entry.getValue();
1371 }
1372
1373 return null;
1374 }
1375
1376 /**
1377 * Conversions needed by {@link #convertEntitiesToPlain(String)}
1378 */
1379
1380 private static final String[] CONVERSIONS =
1381 { "<", "<", ">", ">", """, "\"", "&", "&" };
1382
1383 /**
1384 * Provided a raw input string that has been recognized to be an expression, this removes excess
1385 * white space and converts &amp;;, &quot;; &lt;; and &gt;; to their normal
1386 * character values (otherwise its impossible to specify those values in expressions in the
1387 * template).
1388 */
1389
1390 private String convertEntitiesToPlain(String input)
1391 {
1392 int inputLength = input.length();
1393
1394 StringBuffer buffer = new StringBuffer(inputLength);
1395
1396 int cursor = 0;
1397
1398 outer: while (cursor < inputLength)
1399 {
1400 for (int i = 0; i < CONVERSIONS.length; i += 2)
1401 {
1402 String entity = CONVERSIONS[i];
1403 int entityLength = entity.length();
1404 String value = CONVERSIONS[i + 1];
1405
1406 if (cursor + entityLength > inputLength)
1407 continue;
1408
1409 if (input.substring(cursor, cursor + entityLength).equals(entity))
1410 {
1411 buffer.append(value);
1412 cursor += entityLength;
1413 continue outer;
1414 }
1415 }
1416
1417 buffer.append(input.charAt(cursor));
1418 cursor++;
1419 }
1420
1421 return buffer.toString().trim();
1422 }
1423
1424 /**
1425 * Returns true if the map contains the given key (caseless search) and the value is "true"
1426 * (caseless comparison).
1427 */
1428
1429 private boolean checkBoolean(String key, Map map)
1430 {
1431 String value = findValueCaselessly(key, map);
1432
1433 if (value == null)
1434 return false;
1435
1436 return value.equalsIgnoreCase("true");
1437 }
1438
1439 /**
1440 * Gets the current location within the file. This allows the location to be created only as
1441 * needed, and multiple objects on the same line can share the same Location instance.
1442 *
1443 * @since 3.0
1444 */
1445
1446 protected Location getCurrentLocation()
1447 {
1448 if (_currentLocation == null)
1449 _currentLocation = new LocationImpl(_resourceLocation, _line);
1450
1451 return _currentLocation;
1452 }
1453
1454 public void setFactory(TemplateTokenFactory factory)
1455 {
1456 _factory = factory;
1457 }
1458
1459 }