001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019 package org.apache.commons.compress.archivers.zip;
020
021 import java.io.EOFException;
022 import java.io.IOException;
023 import java.io.InputStream;
024 import java.io.PushbackInputStream;
025 import java.util.zip.CRC32;
026 import java.util.zip.DataFormatException;
027 import java.util.zip.Inflater;
028 import java.util.zip.ZipException;
029
030 import org.apache.commons.compress.archivers.ArchiveEntry;
031 import org.apache.commons.compress.archivers.ArchiveInputStream;
032
033 /**
034 * Implements an input stream that can read Zip archives.
035 * <p>
036 * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE algorithm is used, as the size information
037 * is not available from the header.
038 * <p>
039 * The {@link ZipFile} class is preferred when reading from files.
040 *
041 * @see ZipFile
042 * @NotThreadSafe
043 */
044 public class ZipArchiveInputStream extends ArchiveInputStream {
045
046 private static final int SHORT = 2;
047 private static final int WORD = 4;
048
049 /**
050 * The zip encoding to use for filenames and the file comment.
051 */
052 private final ZipEncoding zipEncoding;
053
054 /**
055 * Whether to look for and use Unicode extra fields.
056 */
057 private final boolean useUnicodeExtraFields;
058
059 private final InputStream in;
060
061 private final Inflater inf = new Inflater(true);
062 private final CRC32 crc = new CRC32();
063
064 private final byte[] buf = new byte[ZipArchiveOutputStream.BUFFER_SIZE];
065
066 private ZipArchiveEntry current = null;
067 private boolean closed = false;
068 private boolean hitCentralDirectory = false;
069 private int readBytesOfEntry = 0, offsetInBuffer = 0;
070 private int bytesReadFromStream = 0;
071 private int lengthOfLastRead = 0;
072 private boolean hasDataDescriptor = false;
073
074 private static final int LFH_LEN = 30;
075 /*
076 local file header signature 4 bytes (0x04034b50)
077 version needed to extract 2 bytes
078 general purpose bit flag 2 bytes
079 compression method 2 bytes
080 last mod file time 2 bytes
081 last mod file date 2 bytes
082 crc-32 4 bytes
083 compressed size 4 bytes
084 uncompressed size 4 bytes
085 file name length 2 bytes
086 extra field length 2 bytes
087 */
088
089 public ZipArchiveInputStream(InputStream inputStream) {
090 this(inputStream, ZipEncodingHelper.UTF8, true);
091 }
092
093 /**
094 * @param encoding the encoding to use for file names, use null
095 * for the platform's default encoding
096 * @param useUnicodeExtraFields whether to use InfoZIP Unicode
097 * Extra Fields (if present) to set the file names.
098 */
099 public ZipArchiveInputStream(InputStream inputStream,
100 String encoding,
101 boolean useUnicodeExtraFields) {
102 zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
103 this.useUnicodeExtraFields = useUnicodeExtraFields;
104 in = new PushbackInputStream(inputStream, buf.length);
105 }
106
107 public ZipArchiveEntry getNextZipEntry() throws IOException {
108 if (closed || hitCentralDirectory) {
109 return null;
110 }
111 if (current != null) {
112 closeEntry();
113 }
114 byte[] lfh = new byte[LFH_LEN];
115 try {
116 readFully(lfh);
117 } catch (EOFException e) {
118 return null;
119 }
120 ZipLong sig = new ZipLong(lfh);
121 if (sig.equals(ZipLong.CFH_SIG)) {
122 hitCentralDirectory = true;
123 return null;
124 }
125 if (!sig.equals(ZipLong.LFH_SIG)) {
126 return null;
127 }
128
129 int off = WORD;
130 current = new ZipArchiveEntry();
131
132 int versionMadeBy = ZipShort.getValue(lfh, off);
133 off += SHORT;
134 current.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT)
135 & ZipFile.NIBLET_MASK);
136
137 final int generalPurposeFlag = ZipShort.getValue(lfh, off);
138 final boolean hasEFS =
139 (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
140 final ZipEncoding entryEncoding =
141 hasEFS ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
142 hasDataDescriptor = (generalPurposeFlag & 8) != 0;
143
144 off += SHORT;
145
146 current.setMethod(ZipShort.getValue(lfh, off));
147 off += SHORT;
148
149 long time = ZipUtil.dosToJavaTime(ZipLong.getValue(lfh, off));
150 current.setTime(time);
151 off += WORD;
152
153 if (!hasDataDescriptor) {
154 current.setCrc(ZipLong.getValue(lfh, off));
155 off += WORD;
156
157 current.setCompressedSize(ZipLong.getValue(lfh, off));
158 off += WORD;
159
160 current.setSize(ZipLong.getValue(lfh, off));
161 off += WORD;
162 } else {
163 off += 3 * WORD;
164 }
165
166 int fileNameLen = ZipShort.getValue(lfh, off);
167
168 off += SHORT;
169
170 int extraLen = ZipShort.getValue(lfh, off);
171 off += SHORT;
172
173 byte[] fileName = new byte[fileNameLen];
174 readFully(fileName);
175 current.setName(entryEncoding.decode(fileName));
176
177 byte[] extraData = new byte[extraLen];
178 readFully(extraData);
179 current.setExtra(extraData);
180
181 if (!hasEFS && useUnicodeExtraFields) {
182 ZipUtil.setNameAndCommentFromExtraFields(current, fileName, null);
183 }
184 return current;
185 }
186
187 public ArchiveEntry getNextEntry() throws IOException {
188 return getNextZipEntry();
189 }
190
191 public int read(byte[] buffer, int start, int length) throws IOException {
192 if (closed) {
193 throw new IOException("The stream is closed");
194 }
195 if (inf.finished() || current == null) {
196 return -1;
197 }
198
199 // avoid int overflow, check null buffer
200 if (start <= buffer.length && length >= 0 && start >= 0
201 && buffer.length - start >= length) {
202 if (current.getMethod() == ZipArchiveOutputStream.STORED) {
203 int csize = (int) current.getSize();
204 if (readBytesOfEntry >= csize) {
205 return -1;
206 }
207 if (offsetInBuffer >= lengthOfLastRead) {
208 offsetInBuffer = 0;
209 if ((lengthOfLastRead = in.read(buf)) == -1) {
210 return -1;
211 }
212 count(lengthOfLastRead);
213 bytesReadFromStream += lengthOfLastRead;
214 }
215 int toRead = length > lengthOfLastRead
216 ? lengthOfLastRead - offsetInBuffer
217 : length;
218 if ((csize - readBytesOfEntry) < toRead) {
219 toRead = csize - readBytesOfEntry;
220 }
221 System.arraycopy(buf, offsetInBuffer, buffer, start, toRead);
222 offsetInBuffer += toRead;
223 readBytesOfEntry += toRead;
224 crc.update(buffer, start, toRead);
225 return toRead;
226 }
227 if (inf.needsInput()) {
228 fill();
229 if (lengthOfLastRead > 0) {
230 bytesReadFromStream += lengthOfLastRead;
231 }
232 }
233 int read = 0;
234 try {
235 read = inf.inflate(buffer, start, length);
236 } catch (DataFormatException e) {
237 throw new ZipException(e.getMessage());
238 }
239 if (read == 0) {
240 if (inf.finished()) {
241 return -1;
242 } else if (lengthOfLastRead == -1) {
243 throw new IOException("Truncated ZIP file");
244 }
245 }
246 crc.update(buffer, start, read);
247 return read;
248 }
249 throw new ArrayIndexOutOfBoundsException();
250 }
251
252 public void close() throws IOException {
253 if (!closed) {
254 closed = true;
255 in.close();
256 }
257 }
258
259 public long skip(long value) throws IOException {
260 if (value >= 0) {
261 long skipped = 0;
262 byte[] b = new byte[1024];
263 while (skipped != value) {
264 long rem = value - skipped;
265 int x = read(b, 0, (int) (b.length > rem ? rem : b.length));
266 if (x == -1) {
267 return skipped;
268 }
269 skipped += x;
270 }
271 return skipped;
272 }
273 throw new IllegalArgumentException();
274 }
275
276 /*
277 * This test assumes that the zip file does not have any additional leading content,
278 * which is something that is allowed by the specification (e.g. self-extracting zips)
279 */
280 public static boolean matches(byte[] signature, int length) {
281 if (length < ZipArchiveOutputStream.LFH_SIG.length) {
282 return false;
283 }
284
285 return checksig(signature, ZipArchiveOutputStream.LFH_SIG) // normal file
286 || checksig(signature, ZipArchiveOutputStream.EOCD_SIG); // empty zip
287 }
288
289 private static boolean checksig(byte[] signature, byte[] expected){
290 for (int i = 0; i < expected.length; i++) {
291 if (signature[i] != expected[i]) {
292 return false;
293 }
294 }
295 return true;
296 }
297
298 private void closeEntry() throws IOException {
299 if (closed) {
300 throw new IOException("The stream is closed");
301 }
302 if (current == null) {
303 return;
304 }
305 // Ensure all entry bytes are read
306 skip(Long.MAX_VALUE);
307 int inB;
308 if (current.getMethod() == ZipArchiveOutputStream.DEFLATED) {
309 inB = inf.getTotalIn();
310 } else {
311 inB = readBytesOfEntry;
312 }
313 int diff = 0;
314
315 // Pushback any required bytes
316 if ((diff = bytesReadFromStream - inB) != 0) {
317 ((PushbackInputStream) in).unread(buf,
318 lengthOfLastRead - diff, diff);
319 }
320
321 if (hasDataDescriptor) {
322 readFully(new byte[4 * WORD]);
323 }
324
325 inf.reset();
326 readBytesOfEntry = offsetInBuffer = bytesReadFromStream =
327 lengthOfLastRead = 0;
328 crc.reset();
329 current = null;
330 }
331
332 private void fill() throws IOException {
333 if (closed) {
334 throw new IOException("The stream is closed");
335 }
336 if ((lengthOfLastRead = in.read(buf)) > 0) {
337 inf.setInput(buf, 0, lengthOfLastRead);
338 }
339 }
340
341 private void readFully(byte[] b) throws IOException {
342 int count = 0, x = 0;
343 while (count != b.length) {
344 count += x = in.read(b, count, b.length - count);
345 if (x == -1) {
346 throw new EOFException();
347 }
348 }
349 }
350 }