Mercurial > touhou
changeset 0:6b2c7af2384c
Hello Gensokyo _o/
author | Thibaut Girka <thib@sitedethib.com> |
---|---|
date | Sun, 31 Jul 2011 21:32:12 +0200 |
parents | |
children | 57667251d040 |
files | doc/PBG3 pytouhou/__init__.py pytouhou/formats/__init__.py pytouhou/formats/pbg3.py pytouhou/formats/std.py pytouhou/utils/__init__.py pytouhou/utils/bitstream.py pytouhou/utils/helpers.py pytouhou/utils/lzss.py |
diffstat | 9 files changed, 307 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/doc/PBG3 @@ -0,0 +1,57 @@ +The PBG3 format is an archive format used by Touhou 6 (The Embodiment of Scarlet Devil). + +It is a bitstream composed of a header, a file table, and LZSS-compressed files. + + + +Reading integers +---------------- + +Integers in PBG3 files are never signed, they are not byte-aligned, and have a variable size. +Their size is given by two bits: 00 means the number is stored in one byte, 10 means it is stored in three bytes. + +Ex: + 0x0012 is stored as: 0000010010 + 0x0112 is stored as: 010000000100010010 + + + +Reading strings +--------------- + +Strings are stored as standard NULL-terminated sequences of bytes. +The only catch is they are not byte-aligned. + + + +Header +------ + +The header is composed of three fields: +* magic (string): "PBG3" +* number of entries (integer) +* offset of the file table (integer) + +The size of the header is thus comprised between 52 bits and 100 bits. + + + +File table +---------- + +The file table starts at a byte boundary, but as the rest of the file, isn't byte-aligned. +It consists of a sequence of entries. +Each entry is composed of five fields: +* unknown1 (int) #TODO +* unknown2 (int) #TODO +* checksum (int): simple checksum of compressed data +* size (int): size of uncompressed data +* name (string): name of the file + +The checksum is a mere sum of the compressed data. +Files are compressed using the LZSS algorithm, with a dictionary size of 8192 bytes and a minimum matching length of 4 bytes. +The size of the offset component of (offset, length) tuples is 13 bits, whereas the size of the length component is 4 bits. +A file ends with a (0, 0) tuple, that is, 18 zero bits. + +Uncompressing a LZSS-compressed file is quite easy, see lzss.py. +
new file mode 100644 --- /dev/null +++ b/pytouhou/formats/pbg3.py @@ -0,0 +1,71 @@ +from pytouhou.utils.bitstream import BitStream +import pytouhou.utils.lzss as lzss + + +class PBG3BitStream(BitStream): + def read_int(self): + size = self.read(2) + return self.read((size + 1) * 8) + + + def read_string(self, maxsize): + string = [] + for i in range(maxsize): + byte = self.read(8) + if byte == 0: + break + string.append(byte) + return ''.join(chr(byte) for byte in string) + + + +class PBG3(object): + def __init__(self, entries, bitstream=None): + self.entries = entries + self.bitstream = bitstream #TODO + + + @classmethod + def read(cls, file): + magic = file.read(4) + if magic != b'PBG3': + raise Exception #TODO + + bitstream = PBG3BitStream(file) + entries = {} + + nb_entries = bitstream.read_int() + offset = bitstream.read_int() + bitstream.seek(offset) + for i in range(nb_entries): + unknown1 = bitstream.read_int() + unknown2 = bitstream.read_int() + checksum = bitstream.read_int() # Checksum of *compressed data* + offset = bitstream.read_int() + size = bitstream.read_int() + name = bitstream.read_string(255).decode('ascii') + entries[name] = (unknown1, unknown2, checksum, offset, size) + + return PBG3(entries, bitstream) + + + def list_files(self): + return self.entries.keys() + + + def extract(self, filename, check=False): + unkwn1, unkwn2, checksum, offset, size = self.entries[filename] + self.bitstream.seek(offset) + data = lzss.decompress(self.bitstream, size) + if check: + # Checking the checksum + compressed_size = self.bitstream.io.tell() - offset + self.bitstream.seek(offset) + value = 0 + for c in self.bitstream.io.read(compressed_size): + value += ord(c) + value &= 0xFFFFFFFF + if value != checksum: + print('Warning: corrupted data') #TODO + return data +
new file mode 100644 --- /dev/null +++ b/pytouhou/formats/std.py @@ -0,0 +1,85 @@ +from struct import pack, unpack +from pytouhou.utils.helpers import read_string + + + +class Object(object): + def __init__(self): + self.header = (b'\x00') * 28 #TODO + self.quads = [] + + + +class Stage(object): + def __init__(self): + self.name = '' + self.bgms = (('', ''), ('', ''), ('', '')) + self.objects = [] + self.object_instances = [] + self.script = [] + + + @classmethod + def read(cls, file): + stage = Stage() + + nb_objects, nb_faces = unpack('<HH', file.read(4)) + object_instances_offset, script_offset = unpack('<II', file.read(8)) + if file.read(4) != b'\x00\x00\x00\x00': + raise Exception #TODO + + stage.name = read_string(file, 128, 'shift-jis') + + bgm_a = read_string(file, 128, 'shift-jis') + bgm_b = read_string(file, 128, 'shift-jis') + bgm_c = read_string(file, 128, 'shift-jis') + bgm_d = read_string(file, 128, 'shift-jis') + + bgm_a_path = read_string(file, 128, 'ascii') + bgm_b_path = read_string(file, 128, 'ascii') + bgm_c_path = read_string(file, 128, 'ascii') + bgm_d_path = read_string(file, 128, 'ascii') + + stage.bgms = [(bgm_a, bgm_a_path), (bgm_b, bgm_b_path), (bgm_c, bgm_c_path), (bgm_d, bgm_d_path)] #TODO: handle ' ' + + # Read object definitions + offsets = unpack('<%s' % ('I' * nb_objects), file.read(4 * nb_objects)) + for offset in offsets: + obj = Object() + obj.header = file.read(28) #TODO: this has to be reversed! + while True: + unknown, size = unpack('<HH', file.read(4)) + if unknown == 0xffff: + break + if size != 0x1c: + raise Exception #TODO + script_index, _padding, x, y, z, width, height = unpack('<HHfffff', file.read(24)) + #TODO: store script_index, x, y, z, width and height + obj.quads.append((script_index, x, y, z, width, height)) + stage.objects.append(obj) + + + # Read object usages + file.seek(object_instances_offset) + while True: + obj_id, unknown, x, y, z = unpack('<HHfff', file.read(16)) + if (obj_id, unknown) == (0xffff, 0xffff): + break + if unknown != 256: + raise Exception #TODO + stage.object_instances.append((stage.objects[obj_id], x, y, z)) + + + # Read other funny things (script) + file.seek(script_offset) + while True: + frame, message_type, size = unpack('<IHH', file.read(8)) + if (frame, message_type, size) == (0xffffffff, 0xffff, 0xffff): + break + if size != 0x0c: + raise Exception #TODO + data = file.read(12) + #TODO: do something useful with this + + return stage +
new file mode 100644 --- /dev/null +++ b/pytouhou/utils/bitstream.py @@ -0,0 +1,57 @@ +class BitStream(object): + def __init__(self, io): + self.io = io + self.bits = 0 + self.byte = 0 + + + def seek(self, offset, whence=0): + self.io.seek(offset, whence) + self.byte = 0 + self.bits = 0 + + + def tell(self): + return self.io.tell() + + + def tell2(self): + return self.io.tell(), self.bits + + + def read_bit(self): + if not self.bits: + self.byte = ord(self.io.read(1)) + self.bits = 8 + self.bits -= 1 + return (self.byte >> self.bits) & 0x01 + + + def read(self, nb_bits): + value = 0 + for i in range(nb_bits - 1, -1, -1): + value |= self.read_bit() << i + return value + + + def write_bit(self, bit): + if self.bits == 8: + self.io.write(chr(self.byte)) + self.bits = 0 + self.byte = 0 + self.byte &= ~(1 << (7 - self.bits)) + self.byte |= bit << (7 - self.bits) + self.bits += 1 + + + def write(self, bits, nb_bits): + for i in range(nb_bits): + self.write_bit(bits >> (nb_bits - 1 - i) & 0x01) + + + def flush(self): + self.io.write(chr(self.byte)) + self.bits = 0 + self.byte = 0 + self.io.flush() +
new file mode 100644 --- /dev/null +++ b/pytouhou/utils/helpers.py @@ -0,0 +1,12 @@ +def read_string(file, size, encoding=None): + data = file.read(size) + + try: + data = data[:data.index(b'\x00')] + except ValueError: + pass + + if encoding: + return data.decode(encoding) + else: + return data
new file mode 100644 --- /dev/null +++ b/pytouhou/utils/lzss.py @@ -0,0 +1,25 @@ +def decompress(bitstream, size, dictionary_size=0x2000, + offset_size=13, length_size=4, minimum_match_length=3): + out_data = [] + dictionary = [0] * dictionary_size + dictionary_head = 1 + while len(out_data) < size: + flag = bitstream.read_bit() + if flag: + # The `flag` bit is set, indicating the upcoming chunk of data is a literal + # Add it to the uncompressed file, and store it in the dictionary + byte = bitstream.read(8) + dictionary[dictionary_head] = byte + dictionary_head = (dictionary_head + 1) % dictionary_size + out_data.append(byte) + else: + # The `flag` bit is not set, the upcoming chunk is a (offset, length) tuple + offset = bitstream.read(offset_size) + length = bitstream.read(length_size) + minimum_match_length + if (offset, length) == (0, 0): + break + for i in range(offset, offset + length): + out_data.append(dictionary[i % dictionary_size]) + dictionary[dictionary_head] = dictionary[i % dictionary_size] + dictionary_head = (dictionary_head + 1) % dictionary_size + return b''.join(chr(byte) for byte in out_data)