initial commit

This commit is contained in:
2014-01-18 15:06:11 +01:00
parent 3fabb8dcf8
commit 768bec39b3
408 changed files with 171325 additions and 2 deletions

60
tools/lz77/Makefile Normal file
View File

@@ -0,0 +1,60 @@
# programs
CC = gcc
CXX = g++
STRIP = strip
RM = rm
# flags
CFLAGS = -g -Wall
CXXFLAGS = $(CFLAGS)
LIBS =
# directories
SOURCEDIR = src
HEADERDIR =
LIBDIR =
# generated variables
CFILES = $(foreach dir,$(SOURCEDIR),$(wildcard $(dir)/*.c))
CPPFILES = $(foreach dir,$(SOURCEDIR),$(wildcard $(dir)/*.cpp))
HEADERS = $(CFILES:.c=.h) $(CPPFILES:.cpp=.h)
OBJS = $(CFILES:.c=.o) $(CPPFILES:.cpp=.o)
LIBS += $(foreach dir,$(LIBDIR),-L $(dir))
INCLUDE = $(foreach dir,$(HEADERDIR),-I $(dir))
PROJECT = $(notdir $(CURDIR))
# selecting linker
ifeq ($(strip $(CPPFILES)), )
export LD = $(CC)
else
export LD = $(CXX)
endif
# default compilation
.c.o: %.c
@echo [CC] $<
@$(CC) $(INCLUDE) $(CFLAGS) -o $@ -c $<
.cpp.o: %.cpp
@echo [CXX] $<
@$(CXX) $(INCLUDE) $(CXXFLAGS) -o $@ -c $<
# standard rules
$(PROJECT): $(OBJS)
@echo [LINK] $(PROJECT)
@$(LD) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
clean:
@echo Removing objects...
@$(RM) -f $(OBJS)
mrproper: clean
@echo Removing $(PROJECT)
@$(RM) -f $(PROJECT)
release: all clean
@echo [STRIP] $(PROJECT)
@$(STRIP) $(PROJECT)
depend:
makedepend -- $(CFLAGS) -- $(CFILES) $(CPPFILES)

BIN
tools/lz77/lz77 Normal file

Binary file not shown.

510
tools/lz77/src/lz77.c Normal file
View File

@@ -0,0 +1,510 @@
// MsK` : added a main file to make this standalone
// $ gcc -o lz77 lz77.c
//! \file grit_lz.cpp
//! LZSS compression, VRAM safe
//! \date 20050814 - 20050903
//! \author cearn
//
// === NOTES ===
/*
AGBCOMP compatible LZSS compressor
Compresses files in a format that the GBA BIOS can decode.
Original code by Haruhiko Okumura, 4/6/1989.
12-2-404 Green Heights, 580 Nagasawa, Yokosuka 239, Japan.
Anonymous, December 2001:
Converted I/O to load the entire file into memory and operate
on buffers.
Modified to output in the GBA BIOS's native format.
Damian Yerrick, July 2002:
Translated the program into C from C++.
Removed use of non-ANSI <sys/stat.h> that some compilers don't
support.
Made messages more informational.
Corrected an off-by-one error in the allocation of the output
size.
Removed comp.h. Now it's one file.
Damian Yerrick, December 2002:
Prevented matches that reference data before start of file.
Began to standardize comments.
J. Vijn, Feb 2005
* Removed main() and added lzgb_compress() so that the file acts
more like a plugin.
// declaration:
unsigned char *lzgb_compress(const char *src, int src_len,
int *pdst_len);
* Removed InsertNode for ii=[N-2*F, N-F-1] because they create
spurious nodes that allow strings to start _before_ the actual
data. Using uninitialized data would fit nicely into the Bad
Things category. Also removed the safeguards to counter this,
since they're not necessary anymore.
* Made the code VRAM safe (I hope)
On this issue: remember that VRAM cannot be written to in bytes.
Therefore, LZ77UnCompVram prepares an u16 before writing. The
problem occurs when the matched position is one before the current
position, in which case the destination bytes haven't been copied
in yet. There's that using uninitialized data again.
First a little more about the process. Nodes are built for each
bytes. rson[N+1..N+256] are the trees for each byte and each node in
that tree represents a possible match. There will be as many nodes
in the tree as there are of those bytes in the ring buffer.
Naturally the last node added is the one before the current one: r-1.
The routine will traverse the tree of the byte at [r], looking
for two things: a) the longest match and b) the closest match if there
are matches of equal length. And this is where shit happens. Both of
them can go wrong, but always for a very special case. As said, the
problem occurs when the match is the previous position. This can only
happen under one condition: for _stretches_ of the same bytes. The
solution is to use the latest length UNLESS the current node p is the
last one added (r-1). This will work out for both new partial stretches
(which will only have one pass and as a result the 2nd byte will have
match_length=0) and F-long stretches. It's basically a 1 line fix.
Gawd I hate those. (20050312: ok, so it turned out to be a 2 line fix)
Use, distribute, and modify this code freely.
"I do not accept responsibility for any effects, adverse or otherwise,
that this code may have on you, your computer, your sanity, your dog,
and anything else that you can think of. Use it at your own risk."
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/// === TYPES =========================================================
typedef unsigned char u8, BYTE;
typedef struct RECORD
{
int width;
int height;
BYTE *data;
} RECORD;
#define ALIGN4(nn) ( ((nn)+3)&~3 )
#define CPRS_LZ77_TAG 0x10
// --------------------------------------------------------------------
// CONSTANTS
// --------------------------------------------------------------------
// Define information for compression
// (dont modify from 4096/18/2 if AGBCOMP format is required)
#define N 4096 // size of ring buffer (12 bit)
#define F 18 // upper limit for match_length
#define THRESHOLD 2 // encode string into position and length
// if matched length is greater than this
#define NIL N // index for root of binary search trees
#define TEXT_BUF_CLEAR 0 // byte to initialize the area before text_buf with
#define NMASK (N-1) // for wrapping
// --------------------------------------------------------------------
// GLOBALS
// --------------------------------------------------------------------
/* Compressor global variables. If you actually want to USE this
code in a non-trivial app, put these global variables in a struct,
as the Allegro library did.
*/
static unsigned long int codesize = 0; // code size counter
// ring buffer of size N with extra F-1 bytes to facilitate string comparison
static BYTE text_buf[N + F - 1];
static int match_position; // global string match position
static int match_length; // global string match length
static int lson[N+1], rson[N+256+1], dad[N+1]; // left & right children & parents -- These constitute binary search trees.
BYTE *InBuf, *OutBuf;
int InSize, OutSize, InOffset;
// --------------------------------------------------------------------
// PROTOTYPES
// --------------------------------------------------------------------
//! GBA lz77 interface function
int cprs_gba_lz77(RECORD *dst, const RECORD *src);
/* Binary search tree functions */
static void InitTree(void);
static void InsertNode(int r);
static void DeleteNode(int p);
/* Misc Functions */
static void CompressLZ77(void);
static int InChar(void);
// --------------------------------------------------------------------
// FUNCTIONS
// --------------------------------------------------------------------
// Initializes InBuf, InSize; allocates OutBuf.
// the rest is done in CompressLZ77
int cprs_gba_lz77(RECORD *dst, const RECORD *src)
{
// fail on the obvious
if(src==NULL || src->data==NULL || dst==NULL)
return 0;
InSize= src->width*src->height;
OutSize = InSize + (InSize>>3) + 16;
OutBuf = (BYTE*)malloc(OutSize);
if(OutBuf == NULL)
return 0;
InBuf= (BYTE*)src->data;
CompressLZ77();
OutSize= ALIGN4(OutSize);
free(dst->data);
dst->data= (BYTE*)malloc(OutSize);
memcpy(dst->data, OutBuf, OutSize);
dst->width= 1;
dst->height= OutSize;
free(OutBuf);
return OutSize;
}
/* InitTree() **************************
Initialize a binary search tree.
For i = 0 to N - 1, rson[i] and lson[i] will be the right and
left children of node i. These nodes need not be initialized.
Also, dad[i] is the parent of node i. These are initialized
to NIL (= N), which stands for 'not used.'
For i = 0 to 255, rson[N + i + 1] is the root of the tree
for strings that begin with character i. These are
initialized to NIL. Note there are 256 trees.
*/
void InitTree(void)
{
int i;
for(i= N+1; i <= N+256; i++)
rson[i]= NIL;
for(i=0; i < N; i++)
dad[i]= NIL;
}
/* InsertNode() ************************
Inserts string of length F, text_buf[r..r+F-1], into one of the
trees (text_buf[r]'th tree) and returns the longest-match position
and length via the global variables match_position and match_length.
If match_length = F, then removes the old node in favor of the new
one, because the old one will be deleted sooner.
Note r plays double role, as tree node and position in buffer.
*/
void InsertNode(int r)
{
int i, p, cmp, prev_length;
BYTE *key;
cmp= 1; key= &text_buf[r]; p= N + 1 + key[0];
rson[r]= lson[r]= NIL;
prev_length= match_length= 0;
for( ; ; )
{
if(cmp >= 0)
{
if(rson[p] != NIL)
p= rson[p];
else
{
rson[p]= r;
dad[r]= p;
return;
}
}
else
{
if(lson[p] != NIL)
p= lson[p];
else
{
lson[p]= r;
dad[r]= p;
return;
}
}
for(i=1; i < F; i++)
if((cmp = key[i] - text_buf[p + i]) != 0)
break;
if(i > match_length)
{
// VRAM safety:
// match_length= i ONLY if the matched position
// isn't the previous one (r-1)
// for normal case, remove the if.
// That's _IT_?!? Yup, that's it.
if(p != ((r-1)&NMASK) )
{
match_length= i;
match_position= p;
}
if(match_length >= F)
break;
}
}
// Full length match, remove old node in favor of this one
dad[r]= dad[p];
lson[r]= lson[p];
rson[r]= rson[p];
dad[lson[p]]= r;
dad[rson[p]]= r;
if(rson[dad[p]] == p)
rson[dad[p]]= r;
else
lson[dad[p]]= r;
dad[p]= NIL;
}
/* DeleteNode() ************************
Deletes node p from the tree.
*/
void DeleteNode(int p)
{
int q;
if(dad[p] == NIL)
return; /* not in tree */
if(rson[p] == NIL)
q = lson[p];
else if(lson[p] == NIL)
q = rson[p];
else
{
q = lson[p];
if(rson[q] != NIL)
{
do {
q = rson[q];
} while(rson[q] != NIL);
rson[dad[q]] = lson[q];
dad[lson[q]] = dad[q];
lson[q] = lson[p];
dad[lson[p]] = q;
}
rson[q] = rson[p];
dad[rson[p]] = q;
}
dad[q] = dad[p];
if(rson[dad[p]] == p)
rson[dad[p]] = q;
else
lson[dad[p]] = q;
dad[p] = NIL;
}
/* CompressLZ77() **********************
Compress InBuffer to OutBuffer.
*/
void CompressLZ77(void)
{
int i, c, len, r, s, last_match_length, code_buf_ptr;
unsigned char code_buf[17];
unsigned short mask;
BYTE *FileSize;
unsigned int curmatch; // PONDER: doesn't this do what r does?
unsigned int savematch;
OutSize=4; // skip the compression type and file size
InOffset=0;
match_position= curmatch= N-F;
InitTree(); // initialize trees
code_buf[0] = 0; /* code_buf[1..16] saves eight units of code, and
code_buf[0] works as eight flags, "0" representing that the unit
is an unencoded letter (1 byte), "1" a position-and-length pair
(2 bytes). Thus, eight units require at most 16 bytes of code. */
code_buf_ptr = 1;
s = 0; r = N - F;
// Clear the buffer
for(i = s; i < r; i++)
text_buf[i] = TEXT_BUF_CLEAR;
// Read F bytes into the last F bytes of the buffer
for(len = 0; len < F && (c = InChar()) != -1; len++)
text_buf[r + len] = c;
if(len == 0)
return;
/* Insert the F strings, each of which begins with one or more
// 'space' characters. Note the order in which these strings are
// inserted. This way, degenerate trees will be less likely to occur.
*/
// Perhaps.
// However, the strings you create here have no relation to
// the actual data and are therefore completely bogus. Removed!
//for (i = 1; i <= F; i++)
// InsertNode(r - i);
// Create the first node, sets match_length to 0
InsertNode(r);
// GBA LZSS masks are big-endian
mask = 0x80;
do
{
if(match_length > len)
match_length = len;
// match too short: add one unencoded byte
if(match_length <= THRESHOLD)
{
match_length = 1;
code_buf[code_buf_ptr++] = text_buf[r];
}
else // Long enough: add position and length pair.
{
code_buf[0] |= mask; // set match flag
// 0 byte is 4:length and 4:top 4 bits of match_position
savematch= ((curmatch-match_position)&NMASK)-1;
code_buf[code_buf_ptr++] = ((BYTE)((savematch>>8)&0xf))
| ((match_length - (THRESHOLD + 1))<<4);
code_buf[code_buf_ptr++] = (BYTE)savematch;
}
curmatch += match_length;
curmatch &= NMASK;
// if mask is empty, the buffer's full; write it out the code buffer
// at end of source, code_buf_ptr will be <17
if((mask >>= 1) == 0)
{
for(i=0; i < code_buf_ptr; i++)
OutBuf[OutSize++]= code_buf[i];
codesize += code_buf_ptr;
code_buf[0] = 0;
code_buf_ptr = 1;
mask = 0x80;
}
// Inserts nodes for this match. The last_match_length is
// required because InsertNode changes match_length.
last_match_length = match_length;
for(i=0; i < last_match_length && (c = InChar()) != -1; i++)
{
DeleteNode(s); // Delete string beforelook-ahead
text_buf[s] = c; // place new bytes
// text_buf[N..N+F> is a double for text_buf[0..F>
// for easier string comparison
if(s < F-1)
text_buf[s + N] = c;
// add and wrap around the buffer
s = (s + 1) & NMASK;
r = (r + 1) & NMASK;
// Register the string in text_buf[r..r+F-1]
InsertNode(r);
}
while(i++ < last_match_length)
{
// After the end of text
DeleteNode(s); // no need to read, but
s = (s + 1) & NMASK;
r = (r + 1) & NMASK;
if(--len)
InsertNode(r); // buffer may not be empty
}
} while(len > 0); // until length of string to be processed is zero
if(code_buf_ptr > 1)
{
// Send remaining code.
for(i=0; i < code_buf_ptr; i++)
OutBuf[OutSize++]=code_buf[i];
codesize += code_buf_ptr;
}
FileSize= (BYTE*)OutBuf;
FileSize[0]= CPRS_LZ77_TAG;
FileSize[1]= ((InSize>>0)&0xFF);
FileSize[2]= ((InSize>>8)&0xFF);
FileSize[3]= ((InSize>>16)&0xFF);
}
/* InChar() ****************************
Get the next character from the input stream, or -1 for end of file.
*/
int InChar()
{
return (InOffset < InSize) ? InBuf[InOffset++] : -1;
}
// EOF
//
int main(int argc, char ** argv)
{
FILE * in, * out;
if ( argc != 3 ) {
fprintf(stderr, "Usage : %s <original> <compressed>\n", argv[0]);
return -1;
}
in = fopen(argv[1], "rb");
out = fopen(argv[2], "wb");
if ( in == 0 ) {
fprintf(stderr, "FATAL : Could not open %s\n", argv[1]);
return -2;
}
if ( out == 0 ) {
fprintf(stderr, "FATAL : Could not open %s\n", argv[2]);
return -2;
}
fseek(in, 0, SEEK_END);
InSize = ftell(in);
fseek(in, 0, SEEK_SET);
InBuf = malloc(InSize);
fread(InBuf, 1, InSize, in);
OutSize = InSize + (InSize>>3) + 16;
OutBuf = malloc(OutSize);
CompressLZ77();
OutSize = ALIGN4(OutSize);
fwrite(OutBuf, 1, OutSize, out);
free(InBuf);
free(OutBuf);
fclose(in);
fclose(out);
return 0;
}