001 // Copyright 2004, 2005 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry.util.text;
016
017 /**
018 * An object that encodes a character according to rules of the HTML specification,
019 * so that it will be properly parsed by a browser irrespectively of the character
020 * encoding used in the HTML output.
021 *
022 * @author mb
023 * @since 4.0
024 */
025 public class MarkupCharacterTranslator implements ICharacterTranslator
026 {
027 private static final String SAFE_CHARACTERS =
028 "01234567890"
029 + "abcdefghijklmnopqrstuvwxyz"
030 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
031 + "\t\n\r !#$%'()*+,-./:;=?@[\\]^_`{|}~";
032
033 private static final String[][] ENTITIES = {
034 { "\"", """ },
035 { "<", "<" },
036 { ">", ">" },
037 { "&", "&" }
038 };
039
040 private static final ICharacterMatcher SAFE_MATCHER = new AsciiCharacterMatcher(SAFE_CHARACTERS);
041 private static final ICharacterTranslator ENTITY_TRANSLATOR = new AsciiCharacterTranslator(ENTITIES);
042
043 private boolean _encodeNonAscii;
044 private ICharacterMatcher _safeMatcher;
045 private ICharacterTranslator _entityTranslator;
046
047 public MarkupCharacterTranslator()
048 {
049 this(true);
050 }
051
052 public MarkupCharacterTranslator(boolean encodeNonAscii)
053 {
054 this(encodeNonAscii, SAFE_MATCHER, ENTITY_TRANSLATOR);
055 }
056
057 public MarkupCharacterTranslator(boolean encodeNonAscii, ICharacterMatcher safeMatcher, ICharacterTranslator entityTranslator)
058 {
059 _encodeNonAscii = encodeNonAscii;
060 _safeMatcher = safeMatcher;
061 _entityTranslator = entityTranslator;
062 }
063
064 public MarkupCharacterTranslator(boolean encodeNonAscii, String safeCharacters, String[][] entities)
065 {
066 _encodeNonAscii = encodeNonAscii;
067 _safeMatcher = new AsciiCharacterMatcher(safeCharacters);
068 _entityTranslator = new AsciiCharacterTranslator(entities);
069 }
070
071 /**
072 * @see org.apache.tapestry.util.text.IMarkupCharacterTranslator#translateAttribute(char)
073 */
074 public String translate(char ch) {
075 // IE and Firefox do not handle characters between 128 and 159 well,
076 // so they have to be quoted as well
077 if (ch >= 160 && !_encodeNonAscii)
078 return null;
079
080 if (_safeMatcher.matches(ch))
081 return null;
082
083 String entity = _entityTranslator.translate(ch);
084 if (entity != null)
085 return entity;
086
087 // needs to use a NumberFormat here to be fully compliant,
088 // but this is accepted fine by the browsers
089 return "&#" + (int) ch + ";";
090 }
091 }