001 // Copyright 2004, 2005 The Apache Software Foundation
002 //
003 // Licensed under the Apache License, Version 2.0 (the "License");
004 // you may not use this file except in compliance with the License.
005 // You may obtain a copy of the License at
006 //
007 // http://www.apache.org/licenses/LICENSE-2.0
008 //
009 // Unless required by applicable law or agreed to in writing, software
010 // distributed under the License is distributed on an "AS IS" BASIS,
011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012 // See the License for the specific language governing permissions and
013 // limitations under the License.
014
015 package org.apache.tapestry.util.text;
016
017 import java.io.BufferedReader;
018 import java.io.IOException;
019 import java.io.InputStream;
020 import java.io.InputStreamReader;
021 import java.io.Reader;
022 import java.io.UnsupportedEncodingException;
023 import java.util.Map;
024
025 /**
026 * An object that loads a properties file from the provided input stream or reader.
027 * This class reads the property file exactly like java.util.Properties,
028 * except that it also allows the files to use an encoding other than ISO-8859-1
029 * and all non-ASCII characters are read correctly using the given encoding.
030 * In short, non-latin characters no longer need to be quoted using native2ascii.
031 *
032 * @author mb
033 * @since 4.0
034 */
035 public class LocalizedPropertiesLoader
036 {
037 private static final String HEX_DIGITS = "0123456789ABCDEF";
038
039 private static final ICharacterMatcher WHITESPACE = new WhitespaceMatcher(false);
040 private static final ICharacterMatcher LINE_SEPARATOR = new AsciiCharacterMatcher("\n\r");
041 private static final ICharacterMatcher NOT_LINE_SEPARATOR = new InverseMatcher(LINE_SEPARATOR);
042 private static final ICharacterMatcher KEY_VALUE_SEPARATOR = new AsciiCharacterMatcher("=:");
043 private static final ICharacterMatcher SEPARATOR = new AsciiCharacterMatcher("=:\r\n");
044 private static final ICharacterMatcher COMMENT = new AsciiCharacterMatcher("#!");
045 private static final ICharacterMatcher WHITESPACE_OR_SEPARATOR =
046 new CompoundMatcher(new ICharacterMatcher[] { WHITESPACE, SEPARATOR });
047
048 private ExtendedReader _extendedReader;
049
050 /**
051 * Creates a new loader that will load the properties from the given input stream
052 * using the default character encoding
053 *
054 * @param ins the input stream to load the properties from
055 */
056 public LocalizedPropertiesLoader(InputStream ins)
057 {
058 this(new InputStreamReader(ins));
059 }
060
061 /**
062 * Creates a new loader that will load the properties from the given input stream
063 * using the provided character encoding
064 *
065 * @param ins the input stream to load the properties from
066 * @param encoding the character encoding the be used when reading from the stream
067 * @throws UnsupportedEncodingException if the name of the encoding cannot be recognized
068 */
069 public LocalizedPropertiesLoader(InputStream ins, String encoding) throws UnsupportedEncodingException
070 {
071 this(new InputStreamReader(ins, encoding));
072 }
073
074 /**
075 * Creates a new loader that will load the properties from the given reader
076 *
077 * @param reader the Reader to load the properties from
078 */
079 public LocalizedPropertiesLoader(Reader reader)
080 {
081 _extendedReader = new ExtendedReader(new BufferedReader(reader));
082 }
083
084 /**
085 * Read the properties from the provided stream and store them into the given map
086 *
087 * @param properties the map where the properties will be stored
088 * @throws IOException if an error occurs
089 */
090 public void load(Map properties) throws IOException
091 {
092 while (!isAtEndOfStream()) {
093 // we are at the beginning of a line.
094 // check whether it is a comment and if it is, skip it
095 int nextChar = _extendedReader.peek();
096 if (COMMENT.matches((char) nextChar)) {
097 _extendedReader.skipCharacters(NOT_LINE_SEPARATOR);
098 continue;
099 }
100
101 _extendedReader.skipCharacters(WHITESPACE);
102 if (!isAtEndOfLine()) {
103 // this line does not consist only of whitespace. the next word is the key
104 String key = readQuotedLine(WHITESPACE_OR_SEPARATOR);
105 _extendedReader.skipCharacters(WHITESPACE);
106
107 // if the next char is a key-value separator, read it and skip the following spaces
108 nextChar = _extendedReader.peek();
109 if (nextChar > 0 && KEY_VALUE_SEPARATOR.matches((char) nextChar)) {
110 _extendedReader.read();
111 _extendedReader.skipCharacters(WHITESPACE);
112 }
113
114 // finally, read the value
115 String value = readQuotedLine(LINE_SEPARATOR);
116
117 properties.put(key, value);
118 }
119 _extendedReader.skipCharacters(LINE_SEPARATOR);
120 }
121 }
122
123
124 private boolean isAtEndOfStream() throws IOException
125 {
126 int nextChar = _extendedReader.peek();
127 return (nextChar < 0);
128 }
129
130
131 private boolean isAtEndOfLine() throws IOException
132 {
133 int nextChar = _extendedReader.peek();
134 if (nextChar < 0)
135 return true;
136 return LINE_SEPARATOR.matches((char) nextChar);
137 }
138
139
140 private String readQuotedLine(ICharacterMatcher terminators) throws IOException
141 {
142 StringBuffer buf = new StringBuffer();
143
144 while (true) {
145 // see what the next char is
146 int nextChar = _extendedReader.peek();
147
148 // if at end of stream or the char is one of the terminators, stop
149 if (nextChar < 0 || terminators.matches((char) nextChar))
150 break;
151
152 try {
153 // read the char (and possibly unquote it)
154 char ch = readQuotedChar();
155 buf.append(ch);
156 } catch (IgnoreCharacterException e) {
157 // simply ignore -- no character was read
158 }
159 }
160
161 return buf.toString();
162 }
163
164
165 private char readQuotedChar() throws IOException, IgnoreCharacterException
166 {
167 int nextChar = _extendedReader.read();
168 if (nextChar < 0)
169 throw new IgnoreCharacterException();
170 char ch = (char) nextChar;
171
172 // if the char is not the quotation char, simply return it
173 if (ch != '\\')
174 return ch;
175
176 // the character is a quotation character. unquote it
177 nextChar = _extendedReader.read();
178
179 // if at the end of the stream, stop
180 if (nextChar < 0)
181 throw new IgnoreCharacterException();
182
183 ch = (char) nextChar;
184 switch (ch) {
185 case 'u' :
186 char res = 0;
187 for (int i = 0; i < 4; i++) {
188 nextChar = _extendedReader.read();
189 if (nextChar < 0)
190 throw new IllegalArgumentException("Malformed \\uxxxx encoding.");
191 char digitChar = (char) nextChar;
192 int digit = HEX_DIGITS.indexOf(Character.toUpperCase(digitChar));
193 if (digit < 0)
194 throw new IllegalArgumentException("Malformed \\uxxxx encoding.");
195 res = (char) (res * 16 + digit);
196 }
197 return res;
198
199 case '\r' :
200 // if the next char is \n, read it and fall through
201 nextChar = _extendedReader.peek();
202 if (nextChar == '\n')
203 _extendedReader.read();
204 case '\n' :
205 _extendedReader.skipCharacters(WHITESPACE);
206 throw new IgnoreCharacterException();
207
208 case 't' : return '\t';
209 case 'n' : return '\n';
210 case 'r' : return '\r';
211 default: return ch;
212 }
213 }
214
215
216 private static class IgnoreCharacterException extends Exception
217 {
218 private static final long serialVersionUID = 8366308710256427596L;
219 }
220 }