gnuboy-for-dfi/inflate.c

513 lines
15 KiB
C

/* Slightly modified from its original form so as not to exit the
* program on errors. The resulting file remains in the public
* domain for all to use. */
/* --- GZIP file format uncompression routines --- */
/* The following routines (notably the unzip()) function below
* uncompress gzipped data. They are terribly slow at the task, but
* it is presumed that they work reasonably well. They don't do any
* error checking, but they're probably not too vulnerable to buggy
* data either. Another important limitation (but it would be pretty
* easy to get around) is that the data must reside in memory, it is
* not read as a stream. They have been very little tested. Anyway,
* whatever these functions are good for, I put them in the public
* domain. -- David Madore <david.madore@ens.fr> 1999/11/21 */
static unsigned int
peek_bits (const unsigned char *data, long p, int q)
/* Read q bits starting from bit p from the data pointed to by
* data. Data is in little-endian format. */
{
unsigned int answer;
int cnt; /* Number of bits already placed in answer */
char ob, lb; /* Offset and length of bit field within current byte */
answer = 0;
for ( cnt=0 ; cnt<q ; /* cnt updated in body */ )
{
ob = (p+cnt)%8;
lb = 8-ob;
if ( cnt+lb > q )
lb = q-cnt;
answer |= ((unsigned int)((data[(p+cnt)/8]>>ob)&((1U<<lb)-1)))<<cnt;
cnt += lb;
}
return answer;
}
static unsigned int
read_bits (const unsigned char *data, long *p, int q)
/* Read q bits as per peek_bits(), but also increase p by q. */
{
unsigned int answer;
answer = peek_bits (data, *p, q);
*p += q;
return answer;
}
static void
make_code_table (const char size_table[], int table_length,
unsigned int code_table[], int maxbits)
/* Make a code table from a length table. See rfc1951, section
* 3.2.2, for details on what this means. The size_table
* contains the length of the Huffman codes for each letter, and
* the code_table receives the computed codes themselves.
* table_length is the size of the tables (alphabet length) and
* maxbits is the maximal allowed code length. */
{
int i, j;
unsigned int code;
code = 0;
for ( i=1 ; i<=maxbits ; i++ )
{
for ( j=0 ; j<table_length ; j++ )
{
if ( size_table[j]==i )
code_table[j] = code++;
}
code <<= 1;
}
}
static int
decode_one (const unsigned char *data, long *p,
const char size_table[], int table_length,
const unsigned int code_table[], int maxbits)
/* Decode one alphabet letter from the data, starting at bit p
* (which will be increased by the appropriate amount) using
* size_table and code_table to decipher the Huffman encoding. */
{
unsigned int code;
int i, j;
code = 0;
/* Read as many bits as are likely to be necessary - backward, of
* course. */
for ( i=0 ; i<maxbits ; i++ )
code = (code<<1) + peek_bits (data, (*p)+i, 1);
/* Now examine each symbol of the table to find one that matches the
* first bits of the code read. */
for ( j=0 ; j<table_length ; j++ )
{
if ( size_table[j]
&& ( (code>>(maxbits-size_table[j])) == code_table[j] ) )
{
*p += size_table[j];
return j;
}
}
return -1;
}
/* I don't know what these should be. The rfc1951 doesn't seem to say
* (it only mentions them in the last paragraph of section 3.2.1). 15
* is almost certainly safe, and it is the largest I can put given the
* constraints on the size of integers in the C standard. */
#define CLEN_MAXBITS 15
#define HLIT_MAXBITS 15
#define HDIST_MAXBITS 15
/* The magical table sizes... */
#define CLEN_TSIZE 19
#define HLIT_TSIZE 288
#define HDIST_TSIZE 30
static int
get_tables (const unsigned char *data, long *p,
char hlit_size_table[HLIT_TSIZE],
unsigned int hlit_code_table[HLIT_TSIZE],
char hdist_size_table[HDIST_TSIZE],
unsigned int hdist_code_table[HDIST_TSIZE])
/* Fill the Huffman tables (first the code lengths table, and
* then, using it, the literal/length table and the distance
* table). See section 3.2.7 of rfc1951 for details. */
{
char hlit, hdist, hclen;
const int clen_weird_tangle[CLEN_TSIZE]
= { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
char clen_size_table[CLEN_TSIZE];
unsigned int clen_code_table[CLEN_TSIZE];
int j;
unsigned int b;
int remainder; /* See note at end of section 3.2.7 of rfc1951. */
char rem_val;
hlit = read_bits (data, p, 5);
hdist = read_bits (data, p, 5);
hclen = read_bits (data, p, 4);
for ( j=0 ; j<4+hclen ; j++ )
clen_size_table[clen_weird_tangle[j]]
= read_bits (data, p, 3);
for ( ; j<CLEN_TSIZE ; j++ )
clen_size_table[clen_weird_tangle[j]] = 0;
make_code_table (clen_size_table, CLEN_TSIZE,
clen_code_table, CLEN_MAXBITS);
remainder = 0;
rem_val = 0;
for ( j=0 ; j<257+hlit ; j++ )
{
b = decode_one (data, p, clen_size_table, CLEN_TSIZE,
clen_code_table, CLEN_MAXBITS);
if ( b<0 ) return -1;
if ( b<16 )
hlit_size_table[j] = b;
else if ( b == 16 )
{
int k, l;
k = read_bits (data, p, 2);
for ( l=0 ; l<k+3 && j+l<257+hlit ; l++ )
hlit_size_table[j+l] = hlit_size_table[j-1];
j += l-1;
remainder = k+3-l; /* THIS IS SO UGLY! */
rem_val = hlit_size_table[j-1];
}
else if ( b == 17 )
{
int k, l;
k = read_bits (data, p, 3);
for ( l=0 ; l<k+3 && j+l<257+hlit ; l++ )
hlit_size_table[j+l] = 0;
j += l-1;
remainder = k+3-l;
rem_val = 0;
}
else if ( b == 18 )
{
int k, l;
k = read_bits (data, p, 7);
for ( l=0 ; l<k+11 && j+l<257+hlit ; l++ )
hlit_size_table[j+l] = 0;
j += l-1;
remainder = k+11-l;
rem_val = 0;
}
}
for ( ; j<HLIT_TSIZE ; j++ )
hlit_size_table[j] = 0;
make_code_table (hlit_size_table, HLIT_TSIZE,
hlit_code_table, HLIT_MAXBITS);
for ( j=0 ; j<remainder ; j++ )
hdist_size_table[j] = rem_val;
for ( ; j<1+hdist ; j++ )
/* Can you spell: ``copy-paste''? */
{
b = decode_one (data, p, clen_size_table, CLEN_TSIZE,
clen_code_table, CLEN_MAXBITS);
if ( b<0 ) return -1;
if ( b<16 )
hdist_size_table[j] = b;
else if ( b == 16 )
{
int k, l;
k = read_bits (data, p, 2);
for ( l=0 ; l<k+3 && j+l<1+hdist ; l++ )
hdist_size_table[j+l] = hdist_size_table[j-1];
j += l-1;
}
else if ( b == 17 )
{
int k, l;
k = read_bits (data, p, 3);
for ( l=0 ; l<k+3 && j+l<1+hdist ; l++ )
hdist_size_table[j+l] = 0;
j += l-1;
}
else if ( b == 18 )
{
int k, l;
k = read_bits (data, p, 7);
for ( l=0 ; l<k+11 && j+l<1+hdist ; l++ )
hdist_size_table[j+l] = 0;
j += l-1;
}
}
for ( ; j<HDIST_TSIZE ; j++ )
hdist_size_table[j] = 0;
make_code_table (hdist_size_table, HDIST_TSIZE,
hdist_code_table, HDIST_MAXBITS);
return 0;
}
/* The (circular) output buffer. This lets us track
* backreferences. */
/* Minimal buffer size. Also the only useful value. */
#define BUFFER_SIZE 32768
/* Pointer to the character to be added to the buffer */
static unsigned int buffer_ptr = 0;
/* The buffer itself */
static unsigned char buffer[BUFFER_SIZE];
static void
pushout (unsigned char ch)
/* Store one byte in the output buffer so it may be retrieved if
* it is referenced again. */
{
buffer[buffer_ptr++] = ch;
buffer_ptr %= BUFFER_SIZE;
}
static unsigned char
pushin (unsigned int dist)
/* Retrieve one byte, dist bytes away, from the output buffer. */
{
return buffer[(buffer_ptr+(BUFFER_SIZE-dist))%BUFFER_SIZE];
}
static int
get_data (const unsigned char *data, long *p,
const char hlit_size_table[HLIT_TSIZE],
const unsigned int hlit_code_table[HLIT_TSIZE],
const char hdist_size_table[HDIST_TSIZE],
const unsigned int hdist_code_table[HDIST_TSIZE],
void (* callback) (unsigned char d))
/* Do the actual uncompressing. Call callback on each character
* uncompressed. */
{
unsigned int b;
while ( 1 ) {
b = decode_one (data, p, hlit_size_table, HLIT_TSIZE,
hlit_code_table, HLIT_MAXBITS);
if ( b<0 ) return -1;
if ( b < 256 )
/* Literal */
{
pushout ((unsigned char) b);
callback ((unsigned char) b);
}
else if ( b == 256 )
/* End of block */
return 0;
else if ( b >= 257 )
/* Back reference */
{
unsigned int bb;
unsigned int length, dist;
unsigned int l;
switch ( b )
{
case 257: length = 3; break;
case 258: length = 4; break;
case 259: length = 5; break;
case 260: length = 6; break;
case 261: length = 7; break;
case 262: length = 8; break;
case 263: length = 9; break;
case 264: length = 10; break;
case 265: length = 11 + read_bits (data, p, 1); break;
case 266: length = 13 + read_bits (data, p, 1); break;
case 267: length = 15 + read_bits (data, p, 1); break;
case 268: length = 17 + read_bits (data, p, 1); break;
case 269: length = 19 + read_bits (data, p, 2); break;
case 270: length = 23 + read_bits (data, p, 2); break;
case 271: length = 27 + read_bits (data, p, 2); break;
case 272: length = 31 + read_bits (data, p, 2); break;
case 273: length = 35 + read_bits (data, p, 3); break;
case 274: length = 43 + read_bits (data, p, 3); break;
case 275: length = 51 + read_bits (data, p, 3); break;
case 276: length = 59 + read_bits (data, p, 3); break;
case 277: length = 67 + read_bits (data, p, 4); break;
case 278: length = 83 + read_bits (data, p, 4); break;
case 279: length = 99 + read_bits (data, p, 4); break;
case 280: length = 115 + read_bits (data, p, 4); break;
case 281: length = 131 + read_bits (data, p, 5); break;
case 282: length = 163 + read_bits (data, p, 5); break;
case 283: length = 195 + read_bits (data, p, 5); break;
case 284: length = 227 + read_bits (data, p, 5); break;
case 285: length = 258; break;
default:
return -1;
}
bb = decode_one (data, p, hdist_size_table, HDIST_TSIZE,
hdist_code_table, HDIST_MAXBITS);
switch ( bb )
{
case 0: dist = 1; break;
case 1: dist = 2; break;
case 2: dist = 3; break;
case 3: dist = 4; break;
case 4: dist = 5 + read_bits (data, p, 1); break;
case 5: dist = 7 + read_bits (data, p, 1); break;
case 6: dist = 9 + read_bits (data, p, 2); break;
case 7: dist = 13 + read_bits (data, p, 2); break;
case 8: dist = 17 + read_bits (data, p, 3); break;
case 9: dist = 25 + read_bits (data, p, 3); break;
case 10: dist = 33 + read_bits (data, p, 4); break;
case 11: dist = 49 + read_bits (data, p, 4); break;
case 12: dist = 65 + read_bits (data, p, 5); break;
case 13: dist = 97 + read_bits (data, p, 5); break;
case 14: dist = 129 + read_bits (data, p, 6); break;
case 15: dist = 193 + read_bits (data, p, 6); break;
case 16: dist = 257 + read_bits (data, p, 7); break;
case 17: dist = 385 + read_bits (data, p, 7); break;
case 18: dist = 513 + read_bits (data, p, 8); break;
case 19: dist = 769 + read_bits (data, p, 8); break;
case 20: dist = 1025 + read_bits (data, p, 9); break;
case 21: dist = 1537 + read_bits (data, p, 9); break;
case 22: dist = 2049 + read_bits (data, p, 10); break;
case 23: dist = 3073 + read_bits (data, p, 10); break;
case 24: dist = 4097 + read_bits (data, p, 11); break;
case 25: dist = 6145 + read_bits (data, p, 11); break;
case 26: dist = 8193 + read_bits (data, p, 12); break;
case 27: dist = 12289 + read_bits (data, p, 12); break;
case 28: dist = 16385 + read_bits (data, p, 13); break;
case 29: dist = 24577 + read_bits (data, p, 13); break;
default:
return -1;
}
for ( l=0 ; l<length ; l++ )
{
unsigned char ch;
ch = pushin (dist);
pushout (ch);
callback (ch);
}
}
}
return 0;
}
static int
inflate (const unsigned char *data, long *p,
void (* callback) (unsigned char d))
/* Main uncompression function for the deflate method */
{
char blast, btype;
char hlit_size_table[HLIT_TSIZE];
unsigned int hlit_code_table[HLIT_TSIZE];
char hdist_size_table[HDIST_TSIZE];
unsigned int hdist_code_table[HDIST_TSIZE];
again:
blast = read_bits (data, p, 1);
btype = read_bits (data, p, 2);
if ( btype == 1 || btype == 2 )
{
if ( btype == 2 )
{
/* Dynamic Huffman tables */
if (get_tables (data, p,
hlit_size_table, hlit_code_table,
hdist_size_table, hdist_code_table) < 0) return -1;
}
else
/* Fixed Huffman codes */
{
int j;
for ( j=0 ; j<144 ; j++ )
hlit_size_table[j] = 8;
for ( ; j<256 ; j++ )
hlit_size_table[j] = 9;
for ( ; j<280 ; j++ )
hlit_size_table[j] = 7;
for ( ; j<HLIT_TSIZE ; j++ )
hlit_size_table[j] = 8;
make_code_table (hlit_size_table, HLIT_TSIZE,
hlit_code_table, HLIT_MAXBITS);
for ( j=0 ; j<HDIST_TSIZE ; j++ )
hdist_size_table[j] = 5;
make_code_table (hdist_size_table, HDIST_TSIZE,
hdist_code_table, HDIST_MAXBITS);
}
if (get_data (data, p,
hlit_size_table, hlit_code_table,
hdist_size_table, hdist_code_table,
callback) < 0) return -1;;
}
else if ( btype == 0 )
/* Non compressed block */
{
unsigned int len, nlen;
unsigned int l;
unsigned char b;
*p = (*p+7)/8; /* Jump to next byte boundary */
len = read_bits (data, p, 16);
nlen = read_bits (data, p, 16);
for ( l=0 ; l<len ; l++ )
{
b = read_bits (data, p, 8);
pushout (b);
callback (b);
}
}
else
{
return -1;
}
if ( ! blast )
goto again;
return 0;
}
int
unzip (const unsigned char *data, long *p,
void (* callback) (unsigned char d))
/* Uncompress gzipped data. data is a pointer to the data, p is
* a pointer to a long that is initialized to 0 (unless for some
* reason you want to start uncompressing further down the data),
* and callback is a function taking an unsigned char and
* returning void that will be called successively for every
* uncompressed byte. */
{
unsigned char cm, flg;
if ( read_bits (data, p, 8) != 0x1f
|| read_bits (data, p, 8) != 0x8b )
{
return -1;
}
cm = read_bits (data, p, 8);
if ( cm != 0x8 )
{
return -1;
}
flg = read_bits (data, p, 8);
if ( flg & 0xe0 )
/* fprintf (stderr, "Warning: unknown bits are set in flags.\n") */ ;
read_bits (data, p, 32); /* Ignore modification time */
read_bits (data, p, 8); /* Ignore extra flags */
read_bits (data, p, 8); /* Ignore OS type */
if ( flg & 0x4 )
{
/* Skip over extra data */
unsigned int xlen;
xlen = read_bits (data, p, 16);
*p += ((long)xlen)*8;
}
if ( flg & 0x8 )
{
/* Skip over file name */
while ( read_bits (data, p, 8) );
}
if ( flg & 0x10 )
{
/* Skip over comment */
while ( read_bits (data, p, 8) );
}
if ( flg & 0x2 )
/* Ignore CRC16 */
read_bits (data, p, 16);
return inflate (data, p, callback);
/* CRC32 and ISIZE are at the end. We don't even bother to look at
* them. */
}