001    package net.sf.logdistiller.util;
002    
003    /*
004     * @(#)UncompressInputStream.java           0.3-3 06/05/2001
005     *
006     *  This file is part of the HTTPClient package
007     *  Copyright (C) 1996-2001 Ronald Tschal�r
008     *
009     *  This library is free software; you can redistribute it and/or
010     *  modify it under the terms of the GNU Lesser General Public
011     *  License as published by the Free Software Foundation; either
012     *  version 2 of the License, or (at your option) any later version.
013     *
014     *  This library is distributed in the hope that it will be useful,
015     *  but WITHOUT ANY WARRANTY; without even the implied warranty of
016     *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
017     *  Lesser General Public License for more details.
018     *
019     *  You should have received a copy of the GNU Lesser General Public
020     *  License along with this library; if not, write to the Free
021     *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
022     *  MA 02111-1307, USA
023     *
024     *  For questions, suggestions, bug-reports, enhancement-requests etc.
025     *  I may be contacted at:
026     *
027     *  ronald@innovation.ch
028     *
029     *  The HTTPClient's home page is located at:
030     *
031     *  http://www.innovation.ch/java/HTTPClient/
032     *
033     */
034    
035    //package HTTPClient;
036    import java.io.IOException;
037    import java.io.EOFException;
038    import java.io.InputStream;
039    import java.io.FileInputStream;
040    import java.io.FilterInputStream;
041    
042    /**
043     * This class decompresses an input stream containing data compressed with the unix "compress" utility (LZC, a LZW
044     * variant). This code is based heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written by Peter
045     * Jannesen) and the original compress code.
046     *
047     * @version 0.3-3 06/05/2001
048     * @author Ronald Tschal�r
049     */
050    public class UncompressInputStream
051        extends FilterInputStream
052    {
053        /**
054         * @param is the input stream to decompress
055         * @exception IOException if the header is malformed
056         */
057        public UncompressInputStream( InputStream is )
058            throws IOException
059        {
060            super( is );
061            parse_header();
062        }
063    
064        byte[] one = new byte[1];
065    
066        public synchronized int read()
067            throws IOException
068        {
069            int b = in.read( one, 0, 1 );
070            if ( b == 1 )
071                return ( one[0] & 0xff );
072            else
073                return -1;
074        }
075    
076        // string table stuff
077        private static final int TBL_CLEAR = 0x100;
078    
079        private static final int TBL_FIRST = TBL_CLEAR + 1;
080    
081        private int[] tab_prefix;
082    
083        private byte[] tab_suffix;
084    
085        private int[] zeros = new int[256];
086    
087        private byte[] stack;
088    
089        // various state
090        private boolean block_mode;
091    
092        private int n_bits;
093    
094        private int maxbits;
095    
096        private int maxmaxcode;
097    
098        private int maxcode;
099    
100        private int bitmask;
101    
102        private int oldcode;
103    
104        private byte finchar;
105    
106        private int stackp;
107    
108        private int free_ent;
109    
110        // input buffer
111        private byte[] data = new byte[10000];
112    
113        private int bit_pos = 0, end = 0, got = 0;
114    
115        private boolean eof = false;
116    
117        private static final int EXTRA = 64;
118    
119        public synchronized int read( byte[] buf, int off, int len )
120            throws IOException
121        {
122            if ( eof )
123                return -1;
124            int start = off;
125    
126            /*
127             * Using local copies of various variables speeds things up by as much as 30% !
128             */
129            int[] l_tab_prefix = tab_prefix;
130            byte[] l_tab_suffix = tab_suffix;
131            byte[] l_stack = stack;
132            int l_n_bits = n_bits;
133            int l_maxcode = maxcode;
134            int l_maxmaxcode = maxmaxcode;
135            int l_bitmask = bitmask;
136            int l_oldcode = oldcode;
137            byte l_finchar = finchar;
138            int l_stackp = stackp;
139            int l_free_ent = free_ent;
140            byte[] l_data = data;
141            int l_bit_pos = bit_pos;
142    
143            // empty stack if stuff still left
144    
145            int s_size = l_stack.length - l_stackp;
146            if ( s_size > 0 )
147            {
148                int num = ( s_size >= len ) ? len : s_size;
149                System.arraycopy( l_stack, l_stackp, buf, off, num );
150                off += num;
151                len -= num;
152                l_stackp += num;
153            }
154    
155            if ( len == 0 )
156            {
157                stackp = l_stackp;
158                return off - start;
159            }
160    
161            // loop, filling local buffer until enough data has been decompressed
162    
163            main_loop: do
164            {
165                if ( end < EXTRA )
166                    fill();
167    
168                int bit_in = ( got > 0 ) ? ( end - end % l_n_bits ) << 3 : ( end << 3 ) - ( l_n_bits - 1 );
169    
170                while ( l_bit_pos < bit_in )
171                {
172                    // check for code-width expansion
173    
174                    if ( l_free_ent > l_maxcode )
175                    {
176                        int n_bytes = l_n_bits << 3;
177                        l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
178    
179                        l_n_bits++;
180                        l_maxcode = ( l_n_bits == maxbits ) ? l_maxmaxcode : ( 1 << l_n_bits ) - 1;
181    
182                        if ( debug )
183                            System.err.println( "Code-width expanded to " + l_n_bits );
184    
185                        l_bitmask = ( 1 << l_n_bits ) - 1;
186                        l_bit_pos = resetbuf( l_bit_pos );
187                        continue main_loop;
188                    }
189    
190                    // read next code
191    
192                    int pos = l_bit_pos >> 3;
193                    int code =
194                        ( ( ( l_data[pos] & 0xFF ) | ( ( l_data[pos + 1] & 0xFF ) << 8 ) | ( ( l_data[pos + 2] & 0xFF ) << 16 ) ) >> ( l_bit_pos & 0x7 ) )
195                            & l_bitmask;
196                    l_bit_pos += l_n_bits;
197    
198                    // handle first iteration
199    
200                    if ( l_oldcode == -1 )
201                    {
202                        if ( code >= 256 )
203                            throw new IOException( "corrupt input: " + code + " > 255" );
204                        l_finchar = (byte) ( l_oldcode = code );
205                        buf[off++] = l_finchar;
206                        len--;
207                        continue;
208                    }
209    
210                    // handle CLEAR code
211    
212                    if ( code == TBL_CLEAR && block_mode )
213                    {
214                        System.arraycopy( zeros, 0, l_tab_prefix, 0, zeros.length );
215                        l_free_ent = TBL_FIRST - 1;
216    
217                        int n_bytes = l_n_bits << 3;
218                        l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
219                        l_n_bits = INIT_BITS;
220                        l_maxcode = ( 1 << l_n_bits ) - 1;
221                        l_bitmask = l_maxcode;
222    
223                        if ( debug )
224                            System.err.println( "Code tables reset" );
225    
226                        l_bit_pos = resetbuf( l_bit_pos );
227                        continue main_loop;
228                    }
229    
230                    // setup
231    
232                    int incode = code;
233                    l_stackp = l_stack.length;
234    
235                    // Handle KwK case
236    
237                    if ( code >= l_free_ent )
238                    {
239                        if ( code > l_free_ent )
240                            throw new IOException( "corrupt input: code=" + code + ", free_ent=" + l_free_ent );
241    
242                        l_stack[--l_stackp] = l_finchar;
243                        code = l_oldcode;
244                    }
245    
246                    // Generate output characters in reverse order
247    
248                    while ( code >= 256 )
249                    {
250                        l_stack[--l_stackp] = l_tab_suffix[code];
251                        code = l_tab_prefix[code];
252                    }
253                    l_finchar = l_tab_suffix[code];
254                    buf[off++] = l_finchar;
255                    len--;
256    
257                    // And put them out in forward order
258    
259                    s_size = l_stack.length - l_stackp;
260                    int num = ( s_size >= len ) ? len : s_size;
261                    System.arraycopy( l_stack, l_stackp, buf, off, num );
262                    off += num;
263                    len -= num;
264                    l_stackp += num;
265    
266                    // generate new entry in table
267    
268                    if ( l_free_ent < l_maxmaxcode )
269                    {
270                        l_tab_prefix[l_free_ent] = l_oldcode;
271                        l_tab_suffix[l_free_ent] = l_finchar;
272                        l_free_ent++;
273                    }
274    
275                    // Remember previous code
276    
277                    l_oldcode = incode;
278    
279                    // if output buffer full, then return
280    
281                    if ( len == 0 )
282                    {
283                        n_bits = l_n_bits;
284                        maxcode = l_maxcode;
285                        bitmask = l_bitmask;
286                        oldcode = l_oldcode;
287                        finchar = l_finchar;
288                        stackp = l_stackp;
289                        free_ent = l_free_ent;
290                        bit_pos = l_bit_pos;
291    
292                        return off - start;
293                    }
294                }
295    
296                l_bit_pos = resetbuf( l_bit_pos );
297            }
298            while ( got > 0 );
299    
300            n_bits = l_n_bits;
301            maxcode = l_maxcode;
302            bitmask = l_bitmask;
303            oldcode = l_oldcode;
304            finchar = l_finchar;
305            stackp = l_stackp;
306            free_ent = l_free_ent;
307            bit_pos = l_bit_pos;
308    
309            eof = true;
310            return off - start;
311        }
312    
313        /**
314         * Moves the unread data in the buffer to the beginning and resets the pointers.
315         */
316        private final int resetbuf( int bit_pos )
317        {
318            int pos = bit_pos >> 3;
319            System.arraycopy( data, pos, data, 0, end - pos );
320            end -= pos;
321            return 0;
322        }
323    
324        private final void fill()
325            throws IOException
326        {
327            got = in.read( data, end, data.length - 1 - end );
328            if ( got > 0 )
329                end += got;
330        }
331    
332        public synchronized long skip( long num )
333            throws IOException
334        {
335            byte[] tmp = new byte[(int) num];
336            int got = read( tmp, 0, (int) num );
337    
338            if ( got > 0 )
339                return (long) got;
340            else
341                return 0L;
342        }
343    
344        public synchronized int available()
345            throws IOException
346        {
347            if ( eof )
348                return 0;
349    
350            return in.available();
351        }
352    
353        private static final int LZW_MAGIC = 0x1f9d;
354    
355        private static final int MAX_BITS = 16;
356    
357        private static final int INIT_BITS = 9;
358    
359        private static final int HDR_MAXBITS = 0x1f;
360    
361        private static final int HDR_EXTENDED = 0x20;
362    
363        private static final int HDR_FREE = 0x40;
364    
365        private static final int HDR_BLOCK_MODE = 0x80;
366    
367        private void parse_header()
368            throws IOException
369        {
370            // read in and check magic number
371    
372            int t = in.read();
373            if ( t < 0 )
374                throw new EOFException( "Failed to read magic number" );
375            int magic = ( t & 0xff ) << 8;
376            t = in.read();
377            if ( t < 0 )
378                throw new EOFException( "Failed to read magic number" );
379            magic += t & 0xff;
380            if ( magic != LZW_MAGIC )
381                throw new IOException( "Input not in compress format (read " + "magic number 0x"
382                    + Integer.toHexString( magic ) + ")" );
383    
384            // read in header byte
385    
386            int header = in.read();
387            if ( header < 0 )
388                throw new EOFException( "Failed to read header" );
389    
390            block_mode = ( header & HDR_BLOCK_MODE ) > 0;
391            maxbits = header & HDR_MAXBITS;
392    
393            if ( maxbits > MAX_BITS )
394                throw new IOException( "Stream compressed with " + maxbits + " bits, but can only handle " + MAX_BITS
395                    + " bits" );
396    
397            if ( ( header & HDR_EXTENDED ) > 0 )
398                throw new IOException( "Header extension bit set" );
399    
400            if ( ( header & HDR_FREE ) > 0 )
401                throw new IOException( "Header bit 6 set" );
402    
403            if ( debug )
404            {
405                System.err.println( "block mode: " + block_mode );
406                System.err.println( "max bits:   " + maxbits );
407            }
408    
409            // initialize stuff
410    
411            maxmaxcode = 1 << maxbits;
412            n_bits = INIT_BITS;
413            maxcode = ( 1 << n_bits ) - 1;
414            bitmask = maxcode;
415            oldcode = -1;
416            finchar = 0;
417            free_ent = block_mode ? TBL_FIRST : 256;
418    
419            tab_prefix = new int[1 << maxbits];
420            tab_suffix = new byte[1 << maxbits];
421            stack = new byte[1 << maxbits];
422            stackp = stack.length;
423    
424            for ( int idx = 255; idx >= 0; idx-- )
425                tab_suffix[idx] = (byte) idx;
426        }
427    
428        private static final boolean debug = false;
429    
430        public static void main( String args[] )
431            throws Exception
432        {
433            if ( args.length != 1 )
434            {
435                System.err.println( "Usage: UncompressInputStream <file>" );
436                System.exit( 1 );
437            }
438    
439            InputStream in = new UncompressInputStream( new FileInputStream( args[0] ) );
440    
441            byte[] buf = new byte[100000];
442            int tot = 0;
443            long beg = System.currentTimeMillis();
444    
445            while ( true )
446            {
447                int got = in.read( buf );
448                if ( got < 0 )
449                    break;
450                System.out.write( buf, 0, got );
451                tot += got;
452            }
453    
454            long end = System.currentTimeMillis();
455            System.err.println( "Decompressed " + tot + " bytes" );
456            System.err.println( "Time: " + ( end - beg ) / 1000. + " seconds" );
457        }
458    }