001package net.sf.logdistiller.util;
002
003/*
004 * @(#)UncompressInputStream.java           0.3-3 06/05/2001
005 *
006 *  This file is part of the HTTPClient package
007 *  Copyright (C) 1996-2001 Ronald Tschal�r
008 *
009 *  This library is free software; you can redistribute it and/or
010 *  modify it under the terms of the GNU Lesser General Public
011 *  License as published by the Free Software Foundation; either
012 *  version 2 of the License, or (at your option) any later version.
013 *
014 *  This library is distributed in the hope that it will be useful,
015 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
016 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
017 *  Lesser General Public License for more details.
018 *
019 *  You should have received a copy of the GNU Lesser General Public
020 *  License along with this library; if not, write to the Free
021 *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
022 *  MA 02111-1307, USA
023 *
024 *  For questions, suggestions, bug-reports, enhancement-requests etc.
025 *  I may be contacted at:
026 *
027 *  ronald@innovation.ch
028 *
029 *  The HTTPClient's home page is located at:
030 *
031 *  http://www.innovation.ch/java/HTTPClient/
032 *
033 */
034
035//package HTTPClient;
036import java.io.IOException;
037import java.io.EOFException;
038import java.io.InputStream;
039import java.io.FileInputStream;
040import java.io.FilterInputStream;
041
042/**
043 * This class decompresses an input stream containing data compressed with the unix "compress" utility (LZC, a LZW
044 * variant). This code is based heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written by Peter
045 * Jannesen) and the original compress code.
046 *
047 * @version 0.3-3 06/05/2001
048 * @author Ronald Tschal�r
049 */
050public class UncompressInputStream
051    extends FilterInputStream
052{
053    /**
054     * @param is the input stream to decompress
055     * @exception IOException if the header is malformed
056     */
057    public UncompressInputStream( InputStream is )
058        throws IOException
059    {
060        super( is );
061        parse_header();
062    }
063
064    byte[] one = new byte[1];
065
066    public synchronized int read()
067        throws IOException
068    {
069        int b = in.read( one, 0, 1 );
070        if ( b == 1 )
071            return ( one[0] & 0xff );
072        else
073            return -1;
074    }
075
076    // string table stuff
077    private static final int TBL_CLEAR = 0x100;
078
079    private static final int TBL_FIRST = TBL_CLEAR + 1;
080
081    private int[] tab_prefix;
082
083    private byte[] tab_suffix;
084
085    private int[] zeros = new int[256];
086
087    private byte[] stack;
088
089    // various state
090    private boolean block_mode;
091
092    private int n_bits;
093
094    private int maxbits;
095
096    private int maxmaxcode;
097
098    private int maxcode;
099
100    private int bitmask;
101
102    private int oldcode;
103
104    private byte finchar;
105
106    private int stackp;
107
108    private int free_ent;
109
110    // input buffer
111    private byte[] data = new byte[10000];
112
113    private int bit_pos = 0, end = 0, got = 0;
114
115    private boolean eof = false;
116
117    private static final int EXTRA = 64;
118
119    public synchronized int read( byte[] buf, int off, int len )
120        throws IOException
121    {
122        if ( eof )
123            return -1;
124        int start = off;
125
126        /*
127         * Using local copies of various variables speeds things up by as much as 30% !
128         */
129        int[] l_tab_prefix = tab_prefix;
130        byte[] l_tab_suffix = tab_suffix;
131        byte[] l_stack = stack;
132        int l_n_bits = n_bits;
133        int l_maxcode = maxcode;
134        int l_maxmaxcode = maxmaxcode;
135        int l_bitmask = bitmask;
136        int l_oldcode = oldcode;
137        byte l_finchar = finchar;
138        int l_stackp = stackp;
139        int l_free_ent = free_ent;
140        byte[] l_data = data;
141        int l_bit_pos = bit_pos;
142
143        // empty stack if stuff still left
144
145        int s_size = l_stack.length - l_stackp;
146        if ( s_size > 0 )
147        {
148            int num = ( s_size >= len ) ? len : s_size;
149            System.arraycopy( l_stack, l_stackp, buf, off, num );
150            off += num;
151            len -= num;
152            l_stackp += num;
153        }
154
155        if ( len == 0 )
156        {
157            stackp = l_stackp;
158            return off - start;
159        }
160
161        // loop, filling local buffer until enough data has been decompressed
162
163        main_loop: do
164        {
165            if ( end < EXTRA )
166                fill();
167
168            int bit_in = ( got > 0 ) ? ( end - end % l_n_bits ) << 3 : ( end << 3 ) - ( l_n_bits - 1 );
169
170            while ( l_bit_pos < bit_in )
171            {
172                // check for code-width expansion
173
174                if ( l_free_ent > l_maxcode )
175                {
176                    int n_bytes = l_n_bits << 3;
177                    l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
178
179                    l_n_bits++;
180                    l_maxcode = ( l_n_bits == maxbits ) ? l_maxmaxcode : ( 1 << l_n_bits ) - 1;
181
182                    if ( debug )
183                        System.err.println( "Code-width expanded to " + l_n_bits );
184
185                    l_bitmask = ( 1 << l_n_bits ) - 1;
186                    l_bit_pos = resetbuf( l_bit_pos );
187                    continue main_loop;
188                }
189
190                // read next code
191
192                int pos = l_bit_pos >> 3;
193                int code =
194                    ( ( ( l_data[pos] & 0xFF ) | ( ( l_data[pos + 1] & 0xFF ) << 8 ) | ( ( l_data[pos + 2] & 0xFF ) << 16 ) ) >> ( l_bit_pos & 0x7 ) )
195                        & l_bitmask;
196                l_bit_pos += l_n_bits;
197
198                // handle first iteration
199
200                if ( l_oldcode == -1 )
201                {
202                    if ( code >= 256 )
203                        throw new IOException( "corrupt input: " + code + " > 255" );
204                    l_finchar = (byte) ( l_oldcode = code );
205                    buf[off++] = l_finchar;
206                    len--;
207                    continue;
208                }
209
210                // handle CLEAR code
211
212                if ( code == TBL_CLEAR && block_mode )
213                {
214                    System.arraycopy( zeros, 0, l_tab_prefix, 0, zeros.length );
215                    l_free_ent = TBL_FIRST - 1;
216
217                    int n_bytes = l_n_bits << 3;
218                    l_bit_pos = ( l_bit_pos - 1 ) + n_bytes - ( l_bit_pos - 1 + n_bytes ) % n_bytes;
219                    l_n_bits = INIT_BITS;
220                    l_maxcode = ( 1 << l_n_bits ) - 1;
221                    l_bitmask = l_maxcode;
222
223                    if ( debug )
224                        System.err.println( "Code tables reset" );
225
226                    l_bit_pos = resetbuf( l_bit_pos );
227                    continue main_loop;
228                }
229
230                // setup
231
232                int incode = code;
233                l_stackp = l_stack.length;
234
235                // Handle KwK case
236
237                if ( code >= l_free_ent )
238                {
239                    if ( code > l_free_ent )
240                        throw new IOException( "corrupt input: code=" + code + ", free_ent=" + l_free_ent );
241
242                    l_stack[--l_stackp] = l_finchar;
243                    code = l_oldcode;
244                }
245
246                // Generate output characters in reverse order
247
248                while ( code >= 256 )
249                {
250                    l_stack[--l_stackp] = l_tab_suffix[code];
251                    code = l_tab_prefix[code];
252                }
253                l_finchar = l_tab_suffix[code];
254                buf[off++] = l_finchar;
255                len--;
256
257                // And put them out in forward order
258
259                s_size = l_stack.length - l_stackp;
260                int num = ( s_size >= len ) ? len : s_size;
261                System.arraycopy( l_stack, l_stackp, buf, off, num );
262                off += num;
263                len -= num;
264                l_stackp += num;
265
266                // generate new entry in table
267
268                if ( l_free_ent < l_maxmaxcode )
269                {
270                    l_tab_prefix[l_free_ent] = l_oldcode;
271                    l_tab_suffix[l_free_ent] = l_finchar;
272                    l_free_ent++;
273                }
274
275                // Remember previous code
276
277                l_oldcode = incode;
278
279                // if output buffer full, then return
280
281                if ( len == 0 )
282                {
283                    n_bits = l_n_bits;
284                    maxcode = l_maxcode;
285                    bitmask = l_bitmask;
286                    oldcode = l_oldcode;
287                    finchar = l_finchar;
288                    stackp = l_stackp;
289                    free_ent = l_free_ent;
290                    bit_pos = l_bit_pos;
291
292                    return off - start;
293                }
294            }
295
296            l_bit_pos = resetbuf( l_bit_pos );
297        }
298        while ( got > 0 );
299
300        n_bits = l_n_bits;
301        maxcode = l_maxcode;
302        bitmask = l_bitmask;
303        oldcode = l_oldcode;
304        finchar = l_finchar;
305        stackp = l_stackp;
306        free_ent = l_free_ent;
307        bit_pos = l_bit_pos;
308
309        eof = true;
310        return off - start;
311    }
312
313    /**
314     * Moves the unread data in the buffer to the beginning and resets the pointers.
315     */
316    private final int resetbuf( int bit_pos )
317    {
318        int pos = bit_pos >> 3;
319        System.arraycopy( data, pos, data, 0, end - pos );
320        end -= pos;
321        return 0;
322    }
323
324    private final void fill()
325        throws IOException
326    {
327        got = in.read( data, end, data.length - 1 - end );
328        if ( got > 0 )
329            end += got;
330    }
331
332    public synchronized long skip( long num )
333        throws IOException
334    {
335        byte[] tmp = new byte[(int) num];
336        int got = read( tmp, 0, (int) num );
337
338        if ( got > 0 )
339            return (long) got;
340        else
341            return 0L;
342    }
343
344    public synchronized int available()
345        throws IOException
346    {
347        if ( eof )
348            return 0;
349
350        return in.available();
351    }
352
353    private static final int LZW_MAGIC = 0x1f9d;
354
355    private static final int MAX_BITS = 16;
356
357    private static final int INIT_BITS = 9;
358
359    private static final int HDR_MAXBITS = 0x1f;
360
361    private static final int HDR_EXTENDED = 0x20;
362
363    private static final int HDR_FREE = 0x40;
364
365    private static final int HDR_BLOCK_MODE = 0x80;
366
367    private void parse_header()
368        throws IOException
369    {
370        // read in and check magic number
371
372        int t = in.read();
373        if ( t < 0 )
374            throw new EOFException( "Failed to read magic number" );
375        int magic = ( t & 0xff ) << 8;
376        t = in.read();
377        if ( t < 0 )
378            throw new EOFException( "Failed to read magic number" );
379        magic += t & 0xff;
380        if ( magic != LZW_MAGIC )
381            throw new IOException( "Input not in compress format (read " + "magic number 0x"
382                + Integer.toHexString( magic ) + ")" );
383
384        // read in header byte
385
386        int header = in.read();
387        if ( header < 0 )
388            throw new EOFException( "Failed to read header" );
389
390        block_mode = ( header & HDR_BLOCK_MODE ) > 0;
391        maxbits = header & HDR_MAXBITS;
392
393        if ( maxbits > MAX_BITS )
394            throw new IOException( "Stream compressed with " + maxbits + " bits, but can only handle " + MAX_BITS
395                + " bits" );
396
397        if ( ( header & HDR_EXTENDED ) > 0 )
398            throw new IOException( "Header extension bit set" );
399
400        if ( ( header & HDR_FREE ) > 0 )
401            throw new IOException( "Header bit 6 set" );
402
403        if ( debug )
404        {
405            System.err.println( "block mode: " + block_mode );
406            System.err.println( "max bits:   " + maxbits );
407        }
408
409        // initialize stuff
410
411        maxmaxcode = 1 << maxbits;
412        n_bits = INIT_BITS;
413        maxcode = ( 1 << n_bits ) - 1;
414        bitmask = maxcode;
415        oldcode = -1;
416        finchar = 0;
417        free_ent = block_mode ? TBL_FIRST : 256;
418
419        tab_prefix = new int[1 << maxbits];
420        tab_suffix = new byte[1 << maxbits];
421        stack = new byte[1 << maxbits];
422        stackp = stack.length;
423
424        for ( int idx = 255; idx >= 0; idx-- )
425            tab_suffix[idx] = (byte) idx;
426    }
427
428    private static final boolean debug = false;
429
430    public static void main( String args[] )
431        throws Exception
432    {
433        if ( args.length != 1 )
434        {
435            System.err.println( "Usage: UncompressInputStream <file>" );
436            System.exit( 1 );
437        }
438
439        InputStream in = new UncompressInputStream( new FileInputStream( args[0] ) );
440
441        byte[] buf = new byte[100000];
442        int tot = 0;
443        long beg = System.currentTimeMillis();
444
445        while ( true )
446        {
447            int got = in.read( buf );
448            if ( got < 0 )
449                break;
450            System.out.write( buf, 0, got );
451            tot += got;
452        }
453
454        long end = System.currentTimeMillis();
455        System.err.println( "Decompressed " + tot + " bytes" );
456        System.err.println( "Time: " + ( end - beg ) / 1000. + " seconds" );
457    }
458}