001 package net.sf.logdistiller.util;
002
003 /*
004 * @(#)UncompressInputStream.java 0.3-3 06/05/2001
005 *
006 * This file is part of the HTTPClient package
007 * Copyright (C) 1996-2001 Ronald Tschal�r
008 *
009 * This library is free software; you can redistribute it and/or
010 * modify it under the terms of the GNU Lesser General Public
011 * License as published by the Free Software Foundation; either
012 * version 2 of the License, or (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful,
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
017 * Lesser General Public License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free
021 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
022 * MA 02111-1307, USA
023 *
024 * For questions, suggestions, bug-reports, enhancement-requests etc.
025 * I may be contacted at:
026 *
027 * ronald@innovation.ch
028 *
029 * The HTTPClient's home page is located at:
030 *
031 * http://www.innovation.ch/java/HTTPClient/
032 *
033 */
034
035 //package HTTPClient;
036 import java.io.IOException;
037 import java.io.EOFException;
038 import java.io.InputStream;
039 import java.io.FileInputStream;
040 import java.io.FilterInputStream;
041
042 /**
043 * This class decompresses an input stream containing data compressed with the unix "compress" utility (LZC, a LZW
044 * variant). This code is based heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written by Peter
045 * Jannesen) and the original compress code.
046 *
047 * @version 0.3-3 06/05/2001
048 * @author Ronald Tschal�r
049 */
050 public class UncompressInputStream
051 extends FilterInputStream
052 {
053 /**
054 * @param is the input stream to decompress
055 * @exception IOException if the header is malformed
056 */
057 public UncompressInputStream( InputStream is )
058 throws IOException
059 {
060 super( is );
061 parse_header();
062 }
063
064 byte[] one = new byte[1];
065
066 public synchronized int read()
067 throws IOException
068 {
069 int b = in.read( one, 0, 1 );
070 if ( b == 1 )
071 return ( one[0] & 0xff );
072 else
073 return -1;
074 }
075
076 // string table stuff
077 private static final int TBL_CLEAR = 0x100;
078
079 private static final int TBL_FIRST = TBL_CLEAR + 1;
080
081 private int[] tab_prefix;
082
083 private byte[] tab_suffix;
084
085 private int[] zeros = new int[256];
086
087 private byte[] stack;
088
089 // various state
090 private boolean block_mode;
091
092 private int n_bits;
093
094 private int maxbits;
095
096 private int maxmaxcode;
097
098 private int maxcode;
099
100 private int bitmask;
101
102 private int oldcode;
103
104 private byte finchar;
105
106 private int stackp;
107
108 private int free_ent;
109
110 // input buffer
111 private byte[] data = new byte[10000];
112
113 private int bit_pos = 0, end = 0, got = 0;
114
115 private boolean eof = false;
116
117 private static final int EXTRA = 64;
118
119 public synchronized int read( byte[] buf, int off, int len )
120 throws IOException
121 {
122 if ( eof )
123 return -1;
124 int start = off;
125
126 /*
127 * Using local copies of various variables speeds things up by as much as 30% !
128 */
129 int[] l_tab_prefix = tab_prefix;
130 byte[] l_tab_suffix = tab_suffix;
131 byte[] l_stack = stack;
132 int l_n_bits = n_bits;
133 int l_maxcode = maxcode;
134 int l_maxmaxcode = maxmaxcode;
135 int l_bitmask = bitmask;
136 int l_oldcode = oldcode;
137 byte l_finchar = finchar;
138 int l_stackp = stackp;
139 int l_free_ent = free_ent;
140 byte[] l_data = data;
141 int l_bit_pos = bit_pos;
142
143 // empty stack if stuff still left
144
145 int s_size = l_stack.length - l_stackp;
146 if ( s_size > 0 )
147 {
148 int num = ( s_size >= len ) ? len : s_size;
149 System.arraycopy( l_stack, l_stackp, buf, off, num );
150 off += num;
151 len -= num;
152 l_stackp += num;
153 }
154
155 if ( len == 0 )
156 {
157 stackp = l_stackp;
158 return off - start;
159 }
160
161 // loop, filling local buffer until enough data has been decompressed
162
163 main_loop: do
164 {
165 if ( end < EXTRA )
166 fill();
167
168 int bit_in = ( got > 0 ) ? ( end - end % l_n_bits ) << 3 : ( end << 3 ) - ( l_n_bits - 1 );
169
170 while ( l_bit_pos < bit_in )
171 {
172 // check for code-width expansion
173
174 if ( l_free_ent > l_maxcode )
175 {
176 int n_bytes = l_n_bits << 3;
177 l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
178
179 l_n_bits++;
180 l_maxcode = ( l_n_bits == maxbits ) ? l_maxmaxcode : ( 1 << l_n_bits ) - 1;
181
182 if ( debug )
183 System.err.println( "Code-width expanded to " + l_n_bits );
184
185 l_bitmask = ( 1 << l_n_bits ) - 1;
186 l_bit_pos = resetbuf( l_bit_pos );
187 continue main_loop;
188 }
189
190 // read next code
191
192 int pos = l_bit_pos >> 3;
193 int code =
194 ( ( ( l_data[pos] & 0xFF ) | ( ( l_data[pos + 1] & 0xFF ) << 8 ) | ( ( l_data[pos + 2] & 0xFF ) << 16 ) ) >> ( l_bit_pos & 0x7 ) )
195 & l_bitmask;
196 l_bit_pos += l_n_bits;
197
198 // handle first iteration
199
200 if ( l_oldcode == -1 )
201 {
202 if ( code >= 256 )
203 throw new IOException( "corrupt input: " + code + " > 255" );
204 l_finchar = (byte) ( l_oldcode = code );
205 buf[off++] = l_finchar;
206 len--;
207 continue;
208 }
209
210 // handle CLEAR code
211
212 if ( code == TBL_CLEAR && block_mode )
213 {
214 System.arraycopy( zeros, 0, l_tab_prefix, 0, zeros.length );
215 l_free_ent = TBL_FIRST - 1;
216
217 int n_bytes = l_n_bits << 3;
218 l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
219 l_n_bits = INIT_BITS;
220 l_maxcode = ( 1 << l_n_bits ) - 1;
221 l_bitmask = l_maxcode;
222
223 if ( debug )
224 System.err.println( "Code tables reset" );
225
226 l_bit_pos = resetbuf( l_bit_pos );
227 continue main_loop;
228 }
229
230 // setup
231
232 int incode = code;
233 l_stackp = l_stack.length;
234
235 // Handle KwK case
236
237 if ( code >= l_free_ent )
238 {
239 if ( code > l_free_ent )
240 throw new IOException( "corrupt input: code=" + code + ", free_ent=" + l_free_ent );
241
242 l_stack[--l_stackp] = l_finchar;
243 code = l_oldcode;
244 }
245
246 // Generate output characters in reverse order
247
248 while ( code >= 256 )
249 {
250 l_stack[--l_stackp] = l_tab_suffix[code];
251 code = l_tab_prefix[code];
252 }
253 l_finchar = l_tab_suffix[code];
254 buf[off++] = l_finchar;
255 len--;
256
257 // And put them out in forward order
258
259 s_size = l_stack.length - l_stackp;
260 int num = ( s_size >= len ) ? len : s_size;
261 System.arraycopy( l_stack, l_stackp, buf, off, num );
262 off += num;
263 len -= num;
264 l_stackp += num;
265
266 // generate new entry in table
267
268 if ( l_free_ent < l_maxmaxcode )
269 {
270 l_tab_prefix[l_free_ent] = l_oldcode;
271 l_tab_suffix[l_free_ent] = l_finchar;
272 l_free_ent++;
273 }
274
275 // Remember previous code
276
277 l_oldcode = incode;
278
279 // if output buffer full, then return
280
281 if ( len == 0 )
282 {
283 n_bits = l_n_bits;
284 maxcode = l_maxcode;
285 bitmask = l_bitmask;
286 oldcode = l_oldcode;
287 finchar = l_finchar;
288 stackp = l_stackp;
289 free_ent = l_free_ent;
290 bit_pos = l_bit_pos;
291
292 return off - start;
293 }
294 }
295
296 l_bit_pos = resetbuf( l_bit_pos );
297 }
298 while ( got > 0 );
299
300 n_bits = l_n_bits;
301 maxcode = l_maxcode;
302 bitmask = l_bitmask;
303 oldcode = l_oldcode;
304 finchar = l_finchar;
305 stackp = l_stackp;
306 free_ent = l_free_ent;
307 bit_pos = l_bit_pos;
308
309 eof = true;
310 return off - start;
311 }
312
313 /**
314 * Moves the unread data in the buffer to the beginning and resets the pointers.
315 */
316 private final int resetbuf( int bit_pos )
317 {
318 int pos = bit_pos >> 3;
319 System.arraycopy( data, pos, data, 0, end - pos );
320 end -= pos;
321 return 0;
322 }
323
324 private final void fill()
325 throws IOException
326 {
327 got = in.read( data, end, data.length - 1 - end );
328 if ( got > 0 )
329 end += got;
330 }
331
332 public synchronized long skip( long num )
333 throws IOException
334 {
335 byte[] tmp = new byte[(int) num];
336 int got = read( tmp, 0, (int) num );
337
338 if ( got > 0 )
339 return (long) got;
340 else
341 return 0L;
342 }
343
344 public synchronized int available()
345 throws IOException
346 {
347 if ( eof )
348 return 0;
349
350 return in.available();
351 }
352
353 private static final int LZW_MAGIC = 0x1f9d;
354
355 private static final int MAX_BITS = 16;
356
357 private static final int INIT_BITS = 9;
358
359 private static final int HDR_MAXBITS = 0x1f;
360
361 private static final int HDR_EXTENDED = 0x20;
362
363 private static final int HDR_FREE = 0x40;
364
365 private static final int HDR_BLOCK_MODE = 0x80;
366
367 private void parse_header()
368 throws IOException
369 {
370 // read in and check magic number
371
372 int t = in.read();
373 if ( t < 0 )
374 throw new EOFException( "Failed to read magic number" );
375 int magic = ( t & 0xff ) << 8;
376 t = in.read();
377 if ( t < 0 )
378 throw new EOFException( "Failed to read magic number" );
379 magic += t & 0xff;
380 if ( magic != LZW_MAGIC )
381 throw new IOException( "Input not in compress format (read " + "magic number 0x"
382 + Integer.toHexString( magic ) + ")" );
383
384 // read in header byte
385
386 int header = in.read();
387 if ( header < 0 )
388 throw new EOFException( "Failed to read header" );
389
390 block_mode = ( header & HDR_BLOCK_MODE ) > 0;
391 maxbits = header & HDR_MAXBITS;
392
393 if ( maxbits > MAX_BITS )
394 throw new IOException( "Stream compressed with " + maxbits + " bits, but can only handle " + MAX_BITS
395 + " bits" );
396
397 if ( ( header & HDR_EXTENDED ) > 0 )
398 throw new IOException( "Header extension bit set" );
399
400 if ( ( header & HDR_FREE ) > 0 )
401 throw new IOException( "Header bit 6 set" );
402
403 if ( debug )
404 {
405 System.err.println( "block mode: " + block_mode );
406 System.err.println( "max bits: " + maxbits );
407 }
408
409 // initialize stuff
410
411 maxmaxcode = 1 << maxbits;
412 n_bits = INIT_BITS;
413 maxcode = ( 1 << n_bits ) - 1;
414 bitmask = maxcode;
415 oldcode = -1;
416 finchar = 0;
417 free_ent = block_mode ? TBL_FIRST : 256;
418
419 tab_prefix = new int[1 << maxbits];
420 tab_suffix = new byte[1 << maxbits];
421 stack = new byte[1 << maxbits];
422 stackp = stack.length;
423
424 for ( int idx = 255; idx >= 0; idx-- )
425 tab_suffix[idx] = (byte) idx;
426 }
427
428 private static final boolean debug = false;
429
430 public static void main( String args[] )
431 throws Exception
432 {
433 if ( args.length != 1 )
434 {
435 System.err.println( "Usage: UncompressInputStream <file>" );
436 System.exit( 1 );
437 }
438
439 InputStream in = new UncompressInputStream( new FileInputStream( args[0] ) );
440
441 byte[] buf = new byte[100000];
442 int tot = 0;
443 long beg = System.currentTimeMillis();
444
445 while ( true )
446 {
447 int got = in.read( buf );
448 if ( got < 0 )
449 break;
450 System.out.write( buf, 0, got );
451 tot += got;
452 }
453
454 long end = System.currentTimeMillis();
455 System.err.println( "Decompressed " + tot + " bytes" );
456 System.err.println( "Time: " + ( end - beg ) / 1000. + " seconds" );
457 }
458 }