changeset 637:afa012bb8021

Hello Rust!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Wed, 03 Jul 2019 16:27:12 +0200
parents 4fa0a8e7d941
children a806f28e94fc
files .gitignore Cargo.toml src/lib.rs src/th06/mod.rs src/th06/pbg3.rs src/util/bitstream.rs src/util/lzss.rs src/util/mod.rs
diffstat 8 files changed, 464 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/target
+**/*.rs.bk
+Cargo.lock
new file mode 100644
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "touhou"
+version = "0.1.0"
+authors = ["Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>"]
+edition = "2018"
+description = "A collection of tools to work with Touhou data"
+homepage = "https://pytouhou.linkmauve.fr"
+license = "GPL-3.0-or-later"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
new file mode 100644
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,6 @@
+#![deny(missing_docs)]
+
+//! Crate implementing various Touhou formats.
+
+pub mod util;
+pub mod th06;
new file mode 100644
--- /dev/null
+++ b/src/th06/mod.rs
@@ -0,0 +1,3 @@
+//! Touhou 6: EoSD implementation.
+
+pub mod pbg3;
new file mode 100644
--- /dev/null
+++ b/src/th06/pbg3.rs
@@ -0,0 +1,190 @@
+//! PBG3 archive files handling.
+//!
+//! This module provides classes for handling the PBG3 file format.
+//! The PBG3 format is the archive format used by Touhou 6: EoSD.
+//!
+//! PBG3 files are merely a bitstream composed of a header, a file
+//! table, and LZSS-compressed files.
+
+use crate::util::bitstream::BitStream;
+use crate::util::lzss;
+use std::io;
+use std::collections::hash_map::{self, HashMap};
+
+/// Helper struct to handle strings and integers in PBG3 bitstreams.
+pub struct PBG3BitStream<R: io::Read + io::Seek> {
+    bitstream: BitStream<R>,
+}
+
+impl<R: io::Read + io::Seek> PBG3BitStream<R> {
+    /// Create a bitstream capable of reading u32 and strings.
+    pub fn new(bitstream: BitStream<R>) -> PBG3BitStream<R> {
+        PBG3BitStream {
+            bitstream,
+        }
+    }
+
+    /// Seek inside the bitstream, ditching any unused data read.
+    pub fn seek(&mut self, seek_from: io::SeekFrom) -> io::Result<u64> {
+        self.bitstream.seek(seek_from)
+    }
+
+    /// Return the current position in the stream.
+    pub fn tell(&mut self) -> io::Result<u64> {
+        self.bitstream.seek(io::SeekFrom::Current(0))
+    }
+
+    /// Read a given amount of bits.
+    pub fn read(&mut self, nb_bits: usize) -> io::Result<usize> {
+        self.bitstream.read(nb_bits)
+    }
+
+    /// Read a given amount of bytes.
+    pub fn read_bytes(&mut self, nb_bytes: usize) -> io::Result<Vec<u8>> {
+        self.bitstream.read_bytes(nb_bytes)
+    }
+
+    /// Read an integer from the bitstream.
+    ///
+    /// Integers have variable sizes. They begin with a two-bit value indicating
+    /// the number of (non-aligned) bytes to read.
+    pub fn read_u32(&mut self) -> io::Result<u32> {
+        let size = self.read(2)?;
+        Ok(self.read((size + 1) * 8)? as u32)
+    }
+
+    /// Read a string from the bitstream.
+    ///
+    /// Strings are stored as NULL-terminated sequences of bytes.
+    /// The only catch is that they are not byte-aligned.
+    pub fn read_string(&mut self, mut max_size: usize) -> io::Result<Vec<u8>> {
+        let mut buf = Vec::new();
+        while max_size > 0 {
+            let byte = self.read(8)? as u8;
+            if byte == 0 {
+                break;
+            }
+            buf.push(byte);
+            max_size -= 1;
+        }
+        Ok(buf)
+    }
+}
+
+type Entry = (u32, u32, u32, u32, u32);
+
+/// Handle PBG3 archive files.
+///
+/// PBG3 is a file archive format used in Touhou 6: EoSD.
+/// This class provides a representation of such files, as well as functions to
+/// read and extract files from a PBG3 archive.
+pub struct PBG3<R: io::Read + io::Seek> {
+    /// List of PBG3Entry objects describing files present in the archive.
+    entries: HashMap<String, Entry>,
+
+    /// PBG3BitStream struct.
+    bitstream: PBG3BitStream<R>,
+}
+
+impl<R: io::Read + io::Seek> PBG3<R> {
+    /// Create a PBG3 archive.
+    fn new(entries: HashMap<String, Entry>, bitstream: PBG3BitStream<R>) -> PBG3<R> {
+        PBG3 {
+            entries,
+            bitstream,
+        }
+    }
+
+    /// Open a PBG3 archive.
+    pub fn from_file(mut file: R) -> io::Result<PBG3<R>> {
+        let mut magic = [0; 4];
+        file.read(&mut magic)?;
+        if &magic != b"PBG3" {
+            return Err(io::Error::new(io::ErrorKind::Other, "Wrong magic!"));
+        }
+
+        let bitstream = BitStream::new(file);
+        let mut bitstream = PBG3BitStream::new(bitstream);
+        let mut entries = HashMap::new();
+
+        let nb_entries = bitstream.read_u32()?;
+        let offset = bitstream.read_u32()?;
+        bitstream.seek(io::SeekFrom::Start(offset as u64))?;
+
+        for _ in 0..nb_entries {
+            let unknown_1 = bitstream.read_u32()?;
+            let unknown_2 = bitstream.read_u32()?;
+            let checksum = bitstream.read_u32()?; // Checksum of *compressed data*
+            let offset = bitstream.read_u32()?;
+            let size = bitstream.read_u32()?;
+            let name = bitstream.read_string(255)?;
+            // XXX: no unwrap!
+            let name = String::from_utf8(name).unwrap();
+            entries.insert(name, (unknown_1, unknown_2, checksum, offset, size));
+        }
+
+        Ok(PBG3::new(entries, bitstream))
+    }
+
+    /// List all file entries in this PBG3 archive.
+    pub fn list_files(&self) -> hash_map::Keys<String, Entry> {
+        self.entries.keys()
+    }
+
+    /// Read a single file from this PBG3 archive.
+    pub fn get_file(&mut self, filename: String, check: bool) -> io::Result<Vec<u8>> {
+        // XXX: no unwrap!
+        let (_unknown_1, _unknown_2, checksum, offset, size) = self.entries.get(&filename).unwrap();
+        self.bitstream.seek(io::SeekFrom::Start(*offset as u64))?;
+        let data = lzss::decompress(&mut self.bitstream.bitstream, *size as usize, 0x2000, 13, 4, 3)?;
+        if check {
+            // Verify the checksum.
+            let compressed_size = self.bitstream.tell()? as u32 - *offset;
+            self.bitstream.seek(io::SeekFrom::Start(*offset as u64))?;
+            let mut value: u32 = 0;
+            for c in self.bitstream.read_bytes(compressed_size as usize)? {
+                value += c as u32;
+                value &= 0xffffffff;
+            }
+            if value != *checksum {
+                return Err(io::Error::new(io::ErrorKind::Other, "Corrupted data!"));
+            }
+        }
+        Ok(data)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::util::SeekableSlice;
+    use std::fs::File;
+
+    #[test]
+    fn bitstream() {
+        let data = SeekableSlice::new(b"Hello world!\0");
+        let bitstream = BitStream::new(data);
+        let mut pbg3 = PBG3BitStream::new(bitstream);
+        assert_eq!(pbg3.read_string(42).unwrap(), b"Hello world!");
+    }
+
+    #[test]
+    fn file_present() {
+        let file = File::open("/home/linkmauve/games/pc/東方/TH06 ~ The Embodiment of Scarlet Devil/MD.DAT").unwrap();
+        let file = io::BufReader::new(file);
+        let pbg3 = PBG3::from_file(file).unwrap();
+        let files = pbg3.list_files().cloned().collect::<Vec<String>>();
+        assert!(files.contains(&String::from("th06_01.pos")));
+    }
+
+    #[test]
+    fn check_all_files() {
+        let file = File::open("/home/linkmauve/games/pc/東方/TH06 ~ The Embodiment of Scarlet Devil/MD.DAT").unwrap();
+        let file = io::BufReader::new(file);
+        let mut pbg3 = PBG3::from_file(file).unwrap();
+        let files = pbg3.list_files().cloned().collect::<Vec<String>>();
+        for filename in files {
+            pbg3.get_file(filename, true).unwrap();
+        }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/src/util/bitstream.rs
@@ -0,0 +1,143 @@
+//! Bitstream module.
+
+use std::io;
+
+/// Wrapper around any `Read` trait, to allow bit operations.
+pub struct BitStream<R: io::Read + io::Seek> {
+    io: R,
+    remaining_bits: usize,
+    byte: u8,
+}
+
+impl<R: io::Read + io::Seek> BitStream<R> {
+    /// Create a new bitstream.
+    pub fn new(io: R) -> BitStream<R> {
+        BitStream {
+            io,
+            remaining_bits: 0,
+            byte: 0,
+        }
+    }
+
+    /// Seek inside the bitstream, ditching any unused data read.
+    pub fn seek(&mut self, seek_from: io::SeekFrom) -> io::Result<u64> {
+        self.remaining_bits = 0;
+        self.byte = 0;
+        self.io.seek(seek_from)
+    }
+
+    fn fill_byte(&mut self) -> io::Result<()> {
+        assert!(self.remaining_bits == 0);
+
+        let mut buf = [0u8; 1];
+        self.io.read_exact(&mut buf)?;
+        self.byte = buf[0];
+        self.remaining_bits = 8;
+        Ok(())
+    }
+
+    /// Read only one bit from the stream.
+    pub fn read_bit(&mut self) -> io::Result<bool> {
+        if self.remaining_bits == 0 {
+            self.fill_byte()?;
+        }
+        self.remaining_bits -= 1;
+        Ok((self.byte >> self.remaining_bits) & 0x01 != 0)
+    }
+
+    /// Read `nb_bits` bits from the stream.
+    pub fn read(&mut self, nb_bits: usize) -> io::Result<usize> {
+        let mut nb_bits2 = nb_bits;
+        let mut value: usize = 0;
+        while nb_bits2 > 0 {
+            if self.remaining_bits == 0 {
+                self.fill_byte()?;
+            }
+            let read = if nb_bits2 > self.remaining_bits { self.remaining_bits } else { nb_bits2 };
+            nb_bits2 -= read;
+            self.remaining_bits -= read;
+            value |= (self.byte as usize >> self.remaining_bits) << nb_bits2;
+        }
+        Ok(value & ((1 << nb_bits) - 1))
+    }
+
+    /// Read a given amount of bytes.
+    pub fn read_bytes(&mut self, nb_bytes: usize) -> io::Result<Vec<u8>> {
+        let mut buf = vec![0u8; nb_bytes];
+        self.io.read_exact(&mut buf)?;
+        Ok(buf)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::util::SeekableSlice;
+
+    #[test]
+    fn bit_by_bit() {
+        let data = SeekableSlice::new(&[1, 2, 3]);
+        let mut bitstream = BitStream::new(data);
+
+        // 1
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), true);
+
+        // 2
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), true);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+
+        // 3
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read_bit().unwrap(), true);
+        assert_eq!(bitstream.read_bit().unwrap(), true);
+
+        // Can’t read after the end.
+        bitstream.read_bit().unwrap_err();
+    }
+
+    #[test]
+    fn byte_by_byte() {
+        let data = SeekableSlice::new(&[1, 2, 3]);
+        let mut bitstream = BitStream::new(data);
+
+        assert_eq!(bitstream.read(8).unwrap(), 1);
+        assert_eq!(bitstream.read(8).unwrap(), 2);
+        assert_eq!(bitstream.read(8).unwrap(), 3);
+
+        // Can’t read after the end.
+        bitstream.read(1).unwrap_err();
+    }
+
+    #[test]
+    fn unaligned_bytes() {
+        let data = SeekableSlice::new(&[0, 129, 1, 128]);
+        let mut bitstream = BitStream::new(data);
+
+        assert_eq!(bitstream.read_bit().unwrap(), false);
+        assert_eq!(bitstream.read(8).unwrap(), 1);
+        assert_eq!(bitstream.read(8).unwrap(), 2);
+        assert_eq!(bitstream.read(8).unwrap(), 3);
+        assert_eq!(bitstream.read(7).unwrap(), 0);
+
+        // Can’t read after the end.
+        bitstream.read(1).unwrap_err();
+    }
+}
new file mode 100644
--- /dev/null
+++ b/src/util/lzss.rs
@@ -0,0 +1,57 @@
+//! LZSS implementation.
+
+use std::io;
+use crate::util::bitstream::BitStream;
+
+/// Decompresses a LZSS-compressed file.
+pub fn decompress<R: io::Read + io::Seek>(bitstream: &mut BitStream<R>, size: usize, dictionary_size: usize, offset_size: usize, length_size: usize, minimum_match_length: usize) -> io::Result<Vec<u8>> {
+    let mut data = vec![0; size];
+    let mut dictionary = vec![0; dictionary_size];
+    let mut dictionary_head = 1;
+    let mut ptr = 0;
+
+    while ptr < size {
+        if bitstream.read_bit()? {
+            // The `flag` bit is set, indicating the upcoming chunk of data is a literal.
+            // Add it to the uncompressed file, and store it in the dictionary.
+            let byte = bitstream.read(8)? as u8;
+            dictionary[dictionary_head] = byte;
+            dictionary_head = (dictionary_head + 1) % dictionary_size;
+            data[ptr] = byte;
+            ptr += 1;
+        } else {
+            // The `flag` bit is not set, the upcoming chunk is a (offset, length) tuple.
+            let offset = bitstream.read(offset_size)?;
+            let length = bitstream.read(length_size)? + minimum_match_length;
+            if ptr + length > size {
+                return Err(io::Error::new(io::ErrorKind::Other, "Oh no!"));
+            }
+            if offset == 0 && length == 0 {
+                break;
+            }
+            for i in offset..offset + length {
+                data[ptr] = dictionary[i % dictionary_size];
+                dictionary[dictionary_head] = dictionary[i % dictionary_size];
+                dictionary_head = (dictionary_head + 1) % dictionary_size;
+                ptr += 1;
+            }
+        }
+    }
+
+    Ok(data)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::util::SeekableSlice;
+
+    #[test]
+    #[ignore]
+    fn bit_by_bit() {
+        // TODO: find actual lzss data.
+        let data = SeekableSlice::new(&[0, 0, 0]);
+        let mut bitstream = BitStream::new(data);
+        decompress(&mut bitstream, 3, 0x2000, 13, 4, 3).unwrap();
+    }
+}
new file mode 100644
--- /dev/null
+++ b/src/util/mod.rs
@@ -0,0 +1,50 @@
+//! Module containing a bunch of helper modules.
+
+pub mod bitstream;
+pub mod lzss;
+
+#[cfg(test)]
+use std::io;
+
+#[cfg(test)]
+pub struct SeekableSlice<'a> {
+    slice: &'a [u8],
+    cursor: usize,
+}
+
+#[cfg(test)]
+impl SeekableSlice<'_> {
+    pub fn new(slice: &[u8]) -> SeekableSlice {
+        SeekableSlice {
+            slice,
+            cursor: 0,
+        }
+    }
+}
+
+#[cfg(test)]
+impl io::Read for SeekableSlice<'_> {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        let length = (&self.slice[self.cursor..]).read(buf)?;
+        self.cursor += length;
+        Ok(length)
+    }
+}
+
+#[cfg(test)]
+impl io::Seek for SeekableSlice<'_> {
+    fn seek(&mut self, seek_from: io::SeekFrom) -> io::Result<u64> {
+        match seek_from {
+            io::SeekFrom::Start(offset) => {
+                self.cursor = offset as usize;
+            }
+            io::SeekFrom::End(offset) => {
+                self.cursor = (self.slice.len() as i64 + offset) as usize;
+            }
+            io::SeekFrom::Current(offset) => {
+                self.cursor = (self.cursor as i64 + offset) as usize;
+            }
+        }
+        Ok(self.cursor as u64)
+    }
+}