Mercurial > touhou

new file mode 100644
--- /dev/null
+++ b/doc/PBG3
@@ -0,0 +1,57 @@
+The PBG3 format is an archive format used by Touhou 6 (The Embodiment of Scarlet Devil).
+
+It is a bitstream composed of a header, a file table, and LZSS-compressed files.
+
+
+
+Reading integers
+----------------
+
+Integers in PBG3 files are never signed, they are not byte-aligned, and have a variable size.
+Their size is given by two bits: 00 means the number is stored in one byte, 10 means it is stored in three bytes.
+
+Ex:
+    0x0012 is stored as: 0000010010
+    0x0112 is stored as: 010000000100010010
+
+
+
+Reading strings
+---------------
+
+Strings are stored as standard NULL-terminated sequences of bytes.
+The only catch is they are not byte-aligned.
+
+
+
+Header
+------
+
+The header is composed of three fields:
+* magic (string): "PBG3"
+* number of entries (integer)
+* offset of the file table (integer)
+
+The size of the header is thus comprised between 52 bits and 100 bits.
+
+
+
+File table
+----------
+
+The file table starts at a byte boundary, but as the rest of the file, isn't byte-aligned.
+It consists of a sequence of entries.
+Each entry is composed of five fields:
+* unknown1 (int) #TODO
+* unknown2 (int) #TODO
+* checksum (int): simple checksum of compressed data
+* size (int): size of uncompressed data
+* name (string): name of the file
+
+The checksum is a mere sum of the compressed data.
+Files are compressed using the LZSS algorithm, with a dictionary size of 8192 bytes and a minimum matching length of 4 bytes.
+The size of the offset component of (offset, length) tuples is 13 bits, whereas the size of the length component is 4 bits.
+A file ends with a (0, 0) tuple, that is, 18 zero bits.
+
+Uncompressing a LZSS-compressed file is quite easy, see lzss.py.
+
new file mode 100644
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/pytouhou/formats/pbg3.py
@@ -0,0 +1,71 @@
+from pytouhou.utils.bitstream import BitStream
+import pytouhou.utils.lzss as lzss
+
+
+class PBG3BitStream(BitStream):
+    def read_int(self):
+        size = self.read(2)
+        return self.read((size + 1) * 8)
+
+
+    def read_string(self, maxsize):
+        string = []
+        for i in range(maxsize):
+            byte = self.read(8)
+            if byte == 0:
+                break
+            string.append(byte)
+        return ''.join(chr(byte) for byte in string)
+
+
+
+class PBG3(object):
+    def __init__(self, entries, bitstream=None):
+        self.entries = entries
+        self.bitstream = bitstream #TODO
+
+
+    @classmethod
+    def read(cls, file):
+        magic = file.read(4)
+        if magic != b'PBG3':
+            raise Exception #TODO
+
+        bitstream = PBG3BitStream(file)
+        entries = {}
+
+        nb_entries = bitstream.read_int()
+        offset = bitstream.read_int()
+        bitstream.seek(offset)
+        for i in range(nb_entries):
+            unknown1 = bitstream.read_int()
+            unknown2 = bitstream.read_int()
+            checksum = bitstream.read_int() # Checksum of *compressed data*
+            offset = bitstream.read_int()
+            size = bitstream.read_int()
+            name = bitstream.read_string(255).decode('ascii')
+            entries[name] = (unknown1, unknown2, checksum, offset, size)
+
+        return PBG3(entries, bitstream)
+
+
+    def list_files(self):
+        return self.entries.keys()
+
+
+    def extract(self, filename, check=False):
+        unkwn1, unkwn2, checksum, offset, size = self.entries[filename]
+        self.bitstream.seek(offset)
+        data = lzss.decompress(self.bitstream, size)
+        if check:
+            # Checking the checksum
+            compressed_size = self.bitstream.io.tell() - offset
+            self.bitstream.seek(offset)
+            value = 0
+            for c in self.bitstream.io.read(compressed_size):
+                value += ord(c)
+                value &= 0xFFFFFFFF
+            if value != checksum:
+                print('Warning: corrupted data') #TODO
+        return data
+
new file mode 100644
--- /dev/null
+++ b/pytouhou/formats/std.py
@@ -0,0 +1,85 @@
+from struct import pack, unpack
+from pytouhou.utils.helpers import read_string
+
+
+
+class Object(object):
+    def __init__(self):
+        self.header = (b'\x00') * 28 #TODO
+        self.quads = []
+
+
+
+class Stage(object):
+    def __init__(self):
+        self.name = ''
+        self.bgms = (('', ''), ('', ''), ('', ''))
+        self.objects = []
+        self.object_instances = []
+        self.script = []
+
+
+    @classmethod
+    def read(cls, file):
+        stage = Stage()
+
+        nb_objects, nb_faces = unpack('<HH', file.read(4))
+        object_instances_offset, script_offset = unpack('<II', file.read(8))
+        if file.read(4) != b'\x00\x00\x00\x00':
+            raise Exception #TODO
+
+        stage.name = read_string(file, 128, 'shift-jis')
+
+        bgm_a = read_string(file, 128, 'shift-jis')
+        bgm_b = read_string(file, 128, 'shift-jis')
+        bgm_c = read_string(file, 128, 'shift-jis')
+        bgm_d = read_string(file, 128, 'shift-jis')
+
+        bgm_a_path = read_string(file, 128, 'ascii')
+        bgm_b_path = read_string(file, 128, 'ascii')
+        bgm_c_path = read_string(file, 128, 'ascii')
+        bgm_d_path = read_string(file, 128, 'ascii')
+
+        stage.bgms = [(bgm_a, bgm_a_path), (bgm_b, bgm_b_path), (bgm_c, bgm_c_path), (bgm_d, bgm_d_path)] #TODO: handle ' '
+
+        # Read object definitions
+        offsets = unpack('<%s' % ('I' * nb_objects), file.read(4 * nb_objects))
+        for offset in offsets:
+            obj = Object()
+            obj.header = file.read(28) #TODO: this has to be reversed!
+            while True:
+                unknown, size = unpack('<HH', file.read(4))
+                if unknown == 0xffff:
+                    break
+                if size != 0x1c:
+                    raise Exception #TODO
+                script_index, _padding, x, y, z, width, height = unpack('<HHfffff', file.read(24))
+                #TODO: store script_index, x, y, z, width and height
+                obj.quads.append((script_index, x, y, z, width, height))
+            stage.objects.append(obj)
+
+
+        # Read object usages
+        file.seek(object_instances_offset)
+        while True:
+            obj_id, unknown, x, y, z = unpack('<HHfff', file.read(16))
+            if (obj_id, unknown) == (0xffff, 0xffff):
+                break
+            if unknown != 256:
+                raise Exception #TODO
+            stage.object_instances.append((stage.objects[obj_id], x, y, z))
+
+
+        # Read other funny things (script)
+        file.seek(script_offset)
+        while True:
+            frame, message_type, size = unpack('<IHH', file.read(8))
+            if (frame, message_type, size) == (0xffffffff, 0xffff, 0xffff):
+                break
+            if size != 0x0c:
+                raise Exception #TODO
+            data = file.read(12)
+            #TODO: do something useful with this
+
+        return stage
+
new file mode 100644
new file mode 100644
--- /dev/null
+++ b/pytouhou/utils/bitstream.py
@@ -0,0 +1,57 @@
+class BitStream(object):
+    def __init__(self, io):
+        self.io = io
+        self.bits = 0
+        self.byte = 0
+
+
+    def seek(self, offset, whence=0):
+        self.io.seek(offset, whence)
+        self.byte = 0
+        self.bits = 0
+
+
+    def tell(self):
+        return self.io.tell()
+
+
+    def tell2(self):
+        return self.io.tell(), self.bits
+
+
+    def read_bit(self):
+        if not self.bits:
+            self.byte = ord(self.io.read(1))
+            self.bits = 8
+        self.bits -= 1
+        return (self.byte >> self.bits) & 0x01
+
+
+    def read(self, nb_bits):
+        value = 0
+        for i in range(nb_bits - 1, -1, -1):
+            value |= self.read_bit() << i
+        return value
+
+
+    def write_bit(self, bit):
+        if self.bits == 8:
+            self.io.write(chr(self.byte))
+            self.bits = 0
+            self.byte = 0
+        self.byte &= ~(1 << (7 - self.bits))
+        self.byte |= bit << (7 - self.bits)
+        self.bits += 1
+
+
+    def write(self, bits, nb_bits):
+        for i in range(nb_bits):
+            self.write_bit(bits >> (nb_bits - 1 - i) & 0x01)
+
+
+    def flush(self):
+        self.io.write(chr(self.byte))
+        self.bits = 0
+        self.byte = 0
+        self.io.flush()
+
new file mode 100644
--- /dev/null
+++ b/pytouhou/utils/helpers.py
@@ -0,0 +1,12 @@
+def read_string(file, size, encoding=None):
+    data = file.read(size)
+
+    try:
+        data = data[:data.index(b'\x00')]
+    except ValueError:
+        pass
+
+    if encoding:
+        return data.decode(encoding)
+    else:
+        return data
new file mode 100644
--- /dev/null
+++ b/pytouhou/utils/lzss.py
@@ -0,0 +1,25 @@
+def decompress(bitstream, size, dictionary_size=0x2000,
+               offset_size=13, length_size=4, minimum_match_length=3):
+    out_data = []
+    dictionary = [0] * dictionary_size
+    dictionary_head = 1
+    while len(out_data) < size:
+        flag = bitstream.read_bit()
+        if flag:
+            # The `flag` bit is set, indicating the upcoming chunk of data is a literal
+            # Add it to the uncompressed file, and store it in the dictionary
+            byte = bitstream.read(8)
+            dictionary[dictionary_head] = byte
+            dictionary_head = (dictionary_head + 1) % dictionary_size
+            out_data.append(byte)
+        else:
+            # The `flag` bit is not set, the upcoming chunk is a (offset, length) tuple
+            offset = bitstream.read(offset_size)
+            length = bitstream.read(length_size) + minimum_match_length
+            if (offset, length) == (0, 0):
+                break
+            for i in range(offset, offset + length):
+                out_data.append(dictionary[i % dictionary_size])
+                dictionary[dictionary_head] = dictionary[i % dictionary_size]
+                dictionary_head = (dictionary_head + 1) % dictionary_size
+    return b''.join(chr(byte) for byte in out_data)