View Javadoc
1   package net.sf.logdistiller.util;
2   
3   /*
4    * @(#)UncompressInputStream.java           0.3-3 06/05/2001
5    *
6    *  This file is part of the HTTPClient package
7    *  Copyright (C) 1996-2001 Ronald Tschal�r
8    *
9    *  This library is free software; you can redistribute it and/or
10   *  modify it under the terms of the GNU Lesser General Public
11   *  License as published by the Free Software Foundation; either
12   *  version 2 of the License, or (at your option) any later version.
13   *
14   *  This library is distributed in the hope that it will be useful,
15   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17   *  Lesser General Public License for more details.
18   *
19   *  You should have received a copy of the GNU Lesser General Public
20   *  License along with this library; if not, write to the Free
21   *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
22   *  MA 02111-1307, USA
23   *
24   *  For questions, suggestions, bug-reports, enhancement-requests etc.
25   *  I may be contacted at:
26   *
27   *  ronald@innovation.ch
28   *
29   *  The HTTPClient's home page is located at:
30   *
31   *  http://www.innovation.ch/java/HTTPClient/
32   *
33   */
34  
35  //package HTTPClient;
36  import java.io.IOException;
37  import java.io.EOFException;
38  import java.io.InputStream;
39  import java.io.FileInputStream;
40  import java.io.FilterInputStream;
41  
42  /**
43   * This class decompresses an input stream containing data compressed with the unix "compress" utility (LZC, a LZW
44   * variant). This code is based heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written by Peter
45   * Jannesen) and the original compress code.
46   *
47   * @version 0.3-3 06/05/2001
48   * @author Ronald Tschal�r
49   */
50  public class UncompressInputStream
51      extends FilterInputStream
52  {
53      /**
54       * @param is the input stream to decompress
55       * @exception IOException if the header is malformed
56       */
57      public UncompressInputStream( InputStream is )
58          throws IOException
59      {
60          super( is );
61          parse_header();
62      }
63  
64      byte[] one = new byte[1];
65  
66      public synchronized int read()
67          throws IOException
68      {
69          int b = in.read( one, 0, 1 );
70          if ( b == 1 )
71              return ( one[0] & 0xff );
72          else
73              return -1;
74      }
75  
76      // string table stuff
77      private static final int TBL_CLEAR = 0x100;
78  
79      private static final int TBL_FIRST = TBL_CLEAR + 1;
80  
81      private int[] tab_prefix;
82  
83      private byte[] tab_suffix;
84  
85      private int[] zeros = new int[256];
86  
87      private byte[] stack;
88  
89      // various state
90      private boolean block_mode;
91  
92      private int n_bits;
93  
94      private int maxbits;
95  
96      private int maxmaxcode;
97  
98      private int maxcode;
99  
100     private int bitmask;
101 
102     private int oldcode;
103 
104     private byte finchar;
105 
106     private int stackp;
107 
108     private int free_ent;
109 
110     // input buffer
111     private byte[] data = new byte[10000];
112 
113     private int bit_pos = 0, end = 0, got = 0;
114 
115     private boolean eof = false;
116 
117     private static final int EXTRA = 64;
118 
119     public synchronized int read( byte[] buf, int off, int len )
120         throws IOException
121     {
122         if ( eof )
123             return -1;
124         int start = off;
125 
126         /*
127          * Using local copies of various variables speeds things up by as much as 30% !
128          */
129         int[] l_tab_prefix = tab_prefix;
130         byte[] l_tab_suffix = tab_suffix;
131         byte[] l_stack = stack;
132         int l_n_bits = n_bits;
133         int l_maxcode = maxcode;
134         int l_maxmaxcode = maxmaxcode;
135         int l_bitmask = bitmask;
136         int l_oldcode = oldcode;
137         byte l_finchar = finchar;
138         int l_stackp = stackp;
139         int l_free_ent = free_ent;
140         byte[] l_data = data;
141         int l_bit_pos = bit_pos;
142 
143         // empty stack if stuff still left
144 
145         int s_size = l_stack.length - l_stackp;
146         if ( s_size > 0 )
147         {
148             int num = ( s_size >= len ) ? len : s_size;
149             System.arraycopy( l_stack, l_stackp, buf, off, num );
150             off += num;
151             len -= num;
152             l_stackp += num;
153         }
154 
155         if ( len == 0 )
156         {
157             stackp = l_stackp;
158             return off - start;
159         }
160 
161         // loop, filling local buffer until enough data has been decompressed
162 
163         main_loop: do
164         {
165             if ( end < EXTRA )
166                 fill();
167 
168             int bit_in = ( got > 0 ) ? ( end - end % l_n_bits ) << 3 : ( end << 3 ) - ( l_n_bits - 1 );
169 
170             while ( l_bit_pos < bit_in )
171             {
172                 // check for code-width expansion
173 
174                 if ( l_free_ent > l_maxcode )
175                 {
176                     int n_bytes = l_n_bits << 3;
177                     l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
178 
179                     l_n_bits++;
180                     l_maxcode = ( l_n_bits == maxbits ) ? l_maxmaxcode : ( 1 << l_n_bits ) - 1;
181 
182                     if ( debug )
183                         System.err.println( "Code-width expanded to " + l_n_bits );
184 
185                     l_bitmask = ( 1 << l_n_bits ) - 1;
186                     l_bit_pos = resetbuf( l_bit_pos );
187                     continue main_loop;
188                 }
189 
190                 // read next code
191 
192                 int pos = l_bit_pos >> 3;
193                 int code =
194                     ( ( ( l_data[pos] & 0xFF ) | ( ( l_data[pos + 1] & 0xFF ) << 8 ) | ( ( l_data[pos + 2] & 0xFF ) << 16 ) ) >> ( l_bit_pos & 0x7 ) )
195                         & l_bitmask;
196                 l_bit_pos += l_n_bits;
197 
198                 // handle first iteration
199 
200                 if ( l_oldcode == -1 )
201                 {
202                     if ( code >= 256 )
203                         throw new IOException( "corrupt input: " + code + " > 255" );
204                     l_finchar = (byte) ( l_oldcode = code );
205                     buf[off++] = l_finchar;
206                     len--;
207                     continue;
208                 }
209 
210                 // handle CLEAR code
211 
212                 if ( code == TBL_CLEAR && block_mode )
213                 {
214                     System.arraycopy( zeros, 0, l_tab_prefix, 0, zeros.length );
215                     l_free_ent = TBL_FIRST - 1;
216 
217                     int n_bytes = l_n_bits << 3;
218                     l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
219                     l_n_bits = INIT_BITS;
220                     l_maxcode = ( 1 << l_n_bits ) - 1;
221                     l_bitmask = l_maxcode;
222 
223                     if ( debug )
224                         System.err.println( "Code tables reset" );
225 
226                     l_bit_pos = resetbuf( l_bit_pos );
227                     continue main_loop;
228                 }
229 
230                 // setup
231 
232                 int incode = code;
233                 l_stackp = l_stack.length;
234 
235                 // Handle KwK case
236 
237                 if ( code >= l_free_ent )
238                 {
239                     if ( code > l_free_ent )
240                         throw new IOException( "corrupt input: code=" + code + ", free_ent=" + l_free_ent );
241 
242                     l_stack[--l_stackp] = l_finchar;
243                     code = l_oldcode;
244                 }
245 
246                 // Generate output characters in reverse order
247 
248                 while ( code >= 256 )
249                 {
250                     l_stack[--l_stackp] = l_tab_suffix[code];
251                     code = l_tab_prefix[code];
252                 }
253                 l_finchar = l_tab_suffix[code];
254                 buf[off++] = l_finchar;
255                 len--;
256 
257                 // And put them out in forward order
258 
259                 s_size = l_stack.length - l_stackp;
260                 int num = ( s_size >= len ) ? len : s_size;
261                 System.arraycopy( l_stack, l_stackp, buf, off, num );
262                 off += num;
263                 len -= num;
264                 l_stackp += num;
265 
266                 // generate new entry in table
267 
268                 if ( l_free_ent < l_maxmaxcode )
269                 {
270                     l_tab_prefix[l_free_ent] = l_oldcode;
271                     l_tab_suffix[l_free_ent] = l_finchar;
272                     l_free_ent++;
273                 }
274 
275                 // Remember previous code
276 
277                 l_oldcode = incode;
278 
279                 // if output buffer full, then return
280 
281                 if ( len == 0 )
282                 {
283                     n_bits = l_n_bits;
284                     maxcode = l_maxcode;
285                     bitmask = l_bitmask;
286                     oldcode = l_oldcode;
287                     finchar = l_finchar;
288                     stackp = l_stackp;
289                     free_ent = l_free_ent;
290                     bit_pos = l_bit_pos;
291 
292                     return off - start;
293                 }
294             }
295 
296             l_bit_pos = resetbuf( l_bit_pos );
297         }
298         while ( got > 0 );
299 
300         n_bits = l_n_bits;
301         maxcode = l_maxcode;
302         bitmask = l_bitmask;
303         oldcode = l_oldcode;
304         finchar = l_finchar;
305         stackp = l_stackp;
306         free_ent = l_free_ent;
307         bit_pos = l_bit_pos;
308 
309         eof = true;
310         return off - start;
311     }
312 
313     /**
314      * Moves the unread data in the buffer to the beginning and resets the pointers.
315      */
316     private final int resetbuf( int bit_pos )
317     {
318         int pos = bit_pos >> 3;
319         System.arraycopy( data, pos, data, 0, end - pos );
320         end -= pos;
321         return 0;
322     }
323 
324     private final void fill()
325         throws IOException
326     {
327         got = in.read( data, end, data.length - 1 - end );
328         if ( got > 0 )
329             end += got;
330     }
331 
332     public synchronized long skip( long num )
333         throws IOException
334     {
335         byte[] tmp = new byte[(int) num];
336         int got = read( tmp, 0, (int) num );
337 
338         if ( got > 0 )
339             return (long) got;
340         else
341             return 0L;
342     }
343 
344     public synchronized int available()
345         throws IOException
346     {
347         if ( eof )
348             return 0;
349 
350         return in.available();
351     }
352 
353     private static final int LZW_MAGIC = 0x1f9d;
354 
355     private static final int MAX_BITS = 16;
356 
357     private static final int INIT_BITS = 9;
358 
359     private static final int HDR_MAXBITS = 0x1f;
360 
361     private static final int HDR_EXTENDED = 0x20;
362 
363     private static final int HDR_FREE = 0x40;
364 
365     private static final int HDR_BLOCK_MODE = 0x80;
366 
367     private void parse_header()
368         throws IOException
369     {
370         // read in and check magic number
371 
372         int t = in.read();
373         if ( t < 0 )
374             throw new EOFException( "Failed to read magic number" );
375         int magic = ( t & 0xff ) << 8;
376         t = in.read();
377         if ( t < 0 )
378             throw new EOFException( "Failed to read magic number" );
379         magic += t & 0xff;
380         if ( magic != LZW_MAGIC )
381             throw new IOException( "Input not in compress format (read " + "magic number 0x"
382                 + Integer.toHexString( magic ) + ")" );
383 
384         // read in header byte
385 
386         int header = in.read();
387         if ( header < 0 )
388             throw new EOFException( "Failed to read header" );
389 
390         block_mode = ( header & HDR_BLOCK_MODE ) > 0;
391         maxbits = header & HDR_MAXBITS;
392 
393         if ( maxbits > MAX_BITS )
394             throw new IOException( "Stream compressed with " + maxbits + " bits, but can only handle " + MAX_BITS
395                 + " bits" );
396 
397         if ( ( header & HDR_EXTENDED ) > 0 )
398             throw new IOException( "Header extension bit set" );
399 
400         if ( ( header & HDR_FREE ) > 0 )
401             throw new IOException( "Header bit 6 set" );
402 
403         if ( debug )
404         {
405             System.err.println( "block mode: " + block_mode );
406             System.err.println( "max bits:   " + maxbits );
407         }
408 
409         // initialize stuff
410 
411         maxmaxcode = 1 << maxbits;
412         n_bits = INIT_BITS;
413         maxcode = ( 1 << n_bits ) - 1;
414         bitmask = maxcode;
415         oldcode = -1;
416         finchar = 0;
417         free_ent = block_mode ? TBL_FIRST : 256;
418 
419         tab_prefix = new int[1 << maxbits];
420         tab_suffix = new byte[1 << maxbits];
421         stack = new byte[1 << maxbits];
422         stackp = stack.length;
423 
424         for ( int idx = 255; idx >= 0; idx-- )
425             tab_suffix[idx] = (byte) idx;
426     }
427 
428     private static final boolean debug = false;
429 
430     public static void main( String args[] )
431         throws Exception
432     {
433         if ( args.length != 1 )
434         {
435             System.err.println( "Usage: UncompressInputStream <file>" );
436             System.exit( 1 );
437         }
438 
439         InputStream in = new UncompressInputStream( new FileInputStream( args[0] ) );
440 
441         byte[] buf = new byte[100000];
442         int tot = 0;
443         long beg = System.currentTimeMillis();
444 
445         while ( true )
446         {
447             int got = in.read( buf );
448             if ( got < 0 )
449                 break;
450             System.out.write( buf, 0, got );
451             tot += got;
452         }
453 
454         long end = System.currentTimeMillis();
455         System.err.println( "Decompressed " + tot + " bytes" );
456         System.err.println( "Time: " + ( end - beg ) / 1000. + " seconds" );
457     }
458 }