245 lines
13 KiB
C#
245 lines
13 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
|
|
namespace UniversalEditor.Compression.LZX
|
|
{
|
|
class LZXCompressionModule
|
|
{
|
|
/***************************************************************************
|
|
* lzx.c - LZX decompression routines *
|
|
* ------------------- *
|
|
* *
|
|
* maintainer: Jed Wing <jedwin@ugcs.caltech.edu> *
|
|
* source: modified lzx.c from cabextract v0.5 *
|
|
* notes: This file was taken from cabextract v0.5, which was, *
|
|
* itself, a modified version of the lzx decompression code *
|
|
* from unlzx. *
|
|
* *
|
|
* platforms: In its current incarnation, this file has been tested on *
|
|
* two different Linux platforms (one, redhat-based, with a *
|
|
* 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with *
|
|
* 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were *
|
|
* Intel x86 compatible machines. *
|
|
***************************************************************************/
|
|
|
|
/***************************************************************************
|
|
*
|
|
* Copyright(C) Stuart Caie
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*
|
|
***************************************************************************/
|
|
|
|
public class Lzx
|
|
{
|
|
const int LZX_MIN_MATCH = (2);
|
|
const int LZX_MAX_MATCH = (257);
|
|
const int LZX_NUM_CHARS = (256);
|
|
const int LZX_BLOCKTYPE_INVALID = (0); /* also blocktypes 4-7 invalid */
|
|
const int LZX_BLOCKTYPE_VERBATIM = (1);
|
|
const int LZX_BLOCKTYPE_ALIGNED = (2);
|
|
const int LZX_BLOCKTYPE_UNCOMPRESSED = (3);
|
|
const int LZX_PRETREE_NUM_ELEMENTS = (20);
|
|
const int LZX_ALIGNED_NUM_ELEMENTS = (8); /* aligned offset tree #elements */
|
|
const int LZX_NUM_PRIMARY_LENGTHS = (7); /* this one missing from spec! */
|
|
const int LZX_NUM_SECONDARY_LENGTHS = (249); /* length tree #elements */
|
|
|
|
const int LZX_PRETREE_MAXSYMBOLS = (LZX_PRETREE_NUM_ELEMENTS);
|
|
const int LZX_PRETREE_TABLEBITS = (6);
|
|
const int LZX_MAINTREE_MAXSYMBOLS = (LZX_NUM_CHARS + 50 * 8);
|
|
const int LZX_MAINTREE_TABLEBITS = (12);
|
|
const int LZX_LENGTH_MAXSYMBOLS = (LZX_NUM_SECONDARY_LENGTHS + 1);
|
|
const int LZX_LENGTH_TABLEBITS = (12);
|
|
const int LZX_ALIGNED_MAXSYMBOLS = (LZX_ALIGNED_NUM_ELEMENTS);
|
|
const int LZX_ALIGNED_TABLEBITS = (7);
|
|
|
|
const int LZX_LENTABLE_SAFETY = (64);
|
|
|
|
public struct LZXstate
|
|
{
|
|
byte[] window; /* the actual decoding window */
|
|
uint window_size; /* window size (32Kb through 2Mb) */
|
|
uint actual_size; /* window size when it was first allocated */
|
|
uint window_posn; /* current offset within the window */
|
|
uint R0, R1, R2; /* for the LRU offset system */
|
|
ushort main_elements; /* number of main tree elements */
|
|
int header_read; /* have we started decoding at all yet? */
|
|
ushort block_type; /* type of this block */
|
|
uint block_length; /* uncompressed length of this block */
|
|
uint block_remaining; /* uncompressed bytes still left to decode */
|
|
uint frames_read; /* the number of CFDATA blocks processed */
|
|
int intel_filesize; /* magic header value used for transform */
|
|
int intel_curpos; /* current offset in transform space */
|
|
int intel_started; /* have we seen any translatable data yet? */
|
|
|
|
ushort[] PRETREE_table;
|
|
byte[] PRETREE_len;
|
|
|
|
ushort[] MAINTREE_table;
|
|
byte[] MAINTREE_len;
|
|
|
|
ushort[] LENGTH_table;
|
|
byte[] LENGTH_len;
|
|
|
|
ushort[] ALIGNED_table;
|
|
byte[] ALIGNED_len;
|
|
|
|
public LZXstate(int window)
|
|
{
|
|
PRETREE_table = new ushort[(1 << LZX_PRETREE_TABLEBITS) + (LZX_PRETREE_MAXSYMBOLS << 1)];
|
|
PRETREE_len = new byte[LZX_PRETREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
|
|
|
MAINTREE_table = new ushort[(1 << LZX_MAINTREE_TABLEBITS) + (LZX_MAINTREE_MAXSYMBOLS << 1)];
|
|
MAINTREE_len = new byte[LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
|
|
|
LENGTH_table = new ushort[(1 << LZX_LENGTH_TABLEBITS) + (LZX_LENGTH_MAXSYMBOLS << 1)];
|
|
LENGTH_len = new byte[LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
|
|
|
ALIGNED_table = new ushort[(1 << LZX_ALIGNED_TABLEBITS) + (LZX_ALIGNED_MAXSYMBOLS << 1)];
|
|
ALIGNED_len = new byte[LZX_ALIGNED_MAXSYMBOLS + LZX_LENTABLE_SAFETY];
|
|
|
|
// LZXinit
|
|
{
|
|
uint wndsize = (uint)(1 << (int)window);
|
|
int i, posn_slots;
|
|
|
|
// LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb)
|
|
// if a previously allocated window is big enough, keep it
|
|
if (window < 15 || window > 21) throw (new Exception("Invalid window size"));
|
|
|
|
/* allocate state and associated window */
|
|
this.window = new byte[wndsize];
|
|
this.actual_size = wndsize;
|
|
this.window_size = wndsize;
|
|
|
|
/* calculate required position slots */
|
|
if (window == 20) posn_slots = 42;
|
|
else if (window == 21) posn_slots = 50;
|
|
else posn_slots = window << 1;
|
|
|
|
/** alternatively **/
|
|
/* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
|
|
|
|
/* initialize other state */
|
|
this.R0 = this.R1 = this.R2 = 1;
|
|
this.main_elements = (ushort)(LZX_NUM_CHARS + (posn_slots << 3));
|
|
this.header_read = 0;
|
|
this.frames_read = 0;
|
|
this.block_remaining = 0;
|
|
this.block_type = LZX_BLOCKTYPE_INVALID;
|
|
this.intel_curpos = 0;
|
|
this.intel_started = 0;
|
|
this.window_posn = 0;
|
|
|
|
/* initialise tables to 0 (because deltas will be applied to them) */
|
|
for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) this.MAINTREE_len[i] = 0;
|
|
for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) this.LENGTH_len[i] = 0;
|
|
|
|
////
|
|
this.block_length = 0;
|
|
this.intel_filesize = 0;
|
|
}
|
|
}
|
|
|
|
public void Reset()
|
|
{
|
|
this.R0 = this.R1 = this.R2 = 1;
|
|
this.header_read = 0;
|
|
this.frames_read = 0;
|
|
this.block_remaining = 0;
|
|
this.block_type = LZX_BLOCKTYPE_INVALID;
|
|
this.intel_curpos = 0;
|
|
this.intel_started = 0;
|
|
this.window_posn = 0;
|
|
|
|
for (int i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) this.MAINTREE_len[i] = 0;
|
|
for (int i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) this.LENGTH_len[i] = 0;
|
|
}
|
|
};
|
|
|
|
/* LZX decruncher */
|
|
|
|
/* Microsoft's LZX document and their implementation of the
|
|
* com.ms.util.cab Java package do not concur.
|
|
*
|
|
* In the LZX document, there is a table showing the correlation between
|
|
* window size and the number of position slots. It states that the 1MB
|
|
* window = 40 slots and the 2MB window = 42 slots. In the implementation,
|
|
* 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
|
|
* first slot whose position base is equal to or more than the required
|
|
* window size'. This would explain why other tables in the document refer
|
|
* to 50 slots rather than 42.
|
|
*
|
|
* The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
|
|
* is not defined in the specification.
|
|
*
|
|
* The LZX document does not state the uncompressed block has an
|
|
* uncompressed length field. Where does this length field come from, so
|
|
* we can know how large the block is? The implementation has it as the 24
|
|
* bits following after the 3 blocktype bits, before the alignment
|
|
* padding.
|
|
*
|
|
* The LZX document states that aligned offset blocks have their aligned
|
|
* offset huffman tree AFTER the main and length trees. The implementation
|
|
* suggests that the aligned offset tree is BEFORE the main and length
|
|
* trees.
|
|
*
|
|
* The LZX document decoding algorithm states that, in an aligned offset
|
|
* block, if an extra_bits value is 1, 2 or 3, then that number of bits
|
|
* should be read and the result added to the match offset. This is
|
|
* correct for 1 and 2, but not 3, where just a huffman symbol (using the
|
|
* aligned tree) should be read.
|
|
*
|
|
* Regarding the E8 preprocessing, the LZX document states 'No translation
|
|
* may be performed on the last 6 bytes of the input block'. This is
|
|
* correct. However, the pseudocode provided checks for the *E8 leader*
|
|
* up to the last 6 bytes. If the leader appears between -10 and -7 bytes
|
|
* from the end, this would cause the next four bytes to be modified, at
|
|
* least one of which would be in the last 6 bytes, which is not allowed
|
|
* according to the spec.
|
|
*
|
|
* The specification states that the huffman trees must always contain at
|
|
* least one element. However, many CAB files contain blocks where the
|
|
* length tree is completely empty (because there are no matches), and
|
|
* this is expected to succeed.
|
|
*/
|
|
|
|
|
|
/* LZX uses what it calls 'position slots' to represent match offsets.
|
|
* What this means is that a small 'position slot' number and a small
|
|
* offset from that slot are encoded instead of one large offset for
|
|
* every match.
|
|
* - position_base is an index to the position slot bases
|
|
* - extra_bits states how many bits of offset-from-base data is needed.
|
|
*/
|
|
static byte[] extra_bits = new byte[] {
|
|
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
|
|
7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
|
|
15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
|
|
17, 17, 17
|
|
};
|
|
|
|
static uint[] position_base = new uint[] {
|
|
0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
|
|
256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
|
|
65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
|
|
1835008, 1966080, 2097152
|
|
};
|
|
}
|
|
}
|
|
}
|