# HG changeset patch # User Emmanuel Gil Peyrot # Date 1562164032 -7200 # Node ID afa012bb80211f5fffbea53cd8b5e29a11406397 # Parent 4fa0a8e7d941de5f06ac6e19acf4c8ef37cb9a8c Hello Rust! diff --git a/.gitignore b/.gitignore new file mode 100644 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "touhou" +version = "0.1.0" +authors = ["Emmanuel Gil Peyrot "] +edition = "2018" +description = "A collection of tools to work with Touhou data" +homepage = "https://pytouhou.linkmauve.fr" +license = "GPL-3.0-or-later" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +#![deny(missing_docs)] + +//! Crate implementing various Touhou formats. + +pub mod util; +pub mod th06; diff --git a/src/th06/mod.rs b/src/th06/mod.rs new file mode 100644 --- /dev/null +++ b/src/th06/mod.rs @@ -0,0 +1,3 @@ +//! Touhou 6: EoSD implementation. + +pub mod pbg3; diff --git a/src/th06/pbg3.rs b/src/th06/pbg3.rs new file mode 100644 --- /dev/null +++ b/src/th06/pbg3.rs @@ -0,0 +1,190 @@ +//! PBG3 archive files handling. +//! +//! This module provides classes for handling the PBG3 file format. +//! The PBG3 format is the archive format used by Touhou 6: EoSD. +//! +//! PBG3 files are merely a bitstream composed of a header, a file +//! table, and LZSS-compressed files. + +use crate::util::bitstream::BitStream; +use crate::util::lzss; +use std::io; +use std::collections::hash_map::{self, HashMap}; + +/// Helper struct to handle strings and integers in PBG3 bitstreams. +pub struct PBG3BitStream { + bitstream: BitStream, +} + +impl PBG3BitStream { + /// Create a bitstream capable of reading u32 and strings. + pub fn new(bitstream: BitStream) -> PBG3BitStream { + PBG3BitStream { + bitstream, + } + } + + /// Seek inside the bitstream, ditching any unused data read. + pub fn seek(&mut self, seek_from: io::SeekFrom) -> io::Result { + self.bitstream.seek(seek_from) + } + + /// Return the current position in the stream. + pub fn tell(&mut self) -> io::Result { + self.bitstream.seek(io::SeekFrom::Current(0)) + } + + /// Read a given amount of bits. + pub fn read(&mut self, nb_bits: usize) -> io::Result { + self.bitstream.read(nb_bits) + } + + /// Read a given amount of bytes. + pub fn read_bytes(&mut self, nb_bytes: usize) -> io::Result> { + self.bitstream.read_bytes(nb_bytes) + } + + /// Read an integer from the bitstream. + /// + /// Integers have variable sizes. They begin with a two-bit value indicating + /// the number of (non-aligned) bytes to read. + pub fn read_u32(&mut self) -> io::Result { + let size = self.read(2)?; + Ok(self.read((size + 1) * 8)? as u32) + } + + /// Read a string from the bitstream. + /// + /// Strings are stored as NULL-terminated sequences of bytes. + /// The only catch is that they are not byte-aligned. + pub fn read_string(&mut self, mut max_size: usize) -> io::Result> { + let mut buf = Vec::new(); + while max_size > 0 { + let byte = self.read(8)? as u8; + if byte == 0 { + break; + } + buf.push(byte); + max_size -= 1; + } + Ok(buf) + } +} + +type Entry = (u32, u32, u32, u32, u32); + +/// Handle PBG3 archive files. +/// +/// PBG3 is a file archive format used in Touhou 6: EoSD. +/// This class provides a representation of such files, as well as functions to +/// read and extract files from a PBG3 archive. +pub struct PBG3 { + /// List of PBG3Entry objects describing files present in the archive. + entries: HashMap, + + /// PBG3BitStream struct. + bitstream: PBG3BitStream, +} + +impl PBG3 { + /// Create a PBG3 archive. + fn new(entries: HashMap, bitstream: PBG3BitStream) -> PBG3 { + PBG3 { + entries, + bitstream, + } + } + + /// Open a PBG3 archive. + pub fn from_file(mut file: R) -> io::Result> { + let mut magic = [0; 4]; + file.read(&mut magic)?; + if &magic != b"PBG3" { + return Err(io::Error::new(io::ErrorKind::Other, "Wrong magic!")); + } + + let bitstream = BitStream::new(file); + let mut bitstream = PBG3BitStream::new(bitstream); + let mut entries = HashMap::new(); + + let nb_entries = bitstream.read_u32()?; + let offset = bitstream.read_u32()?; + bitstream.seek(io::SeekFrom::Start(offset as u64))?; + + for _ in 0..nb_entries { + let unknown_1 = bitstream.read_u32()?; + let unknown_2 = bitstream.read_u32()?; + let checksum = bitstream.read_u32()?; // Checksum of *compressed data* + let offset = bitstream.read_u32()?; + let size = bitstream.read_u32()?; + let name = bitstream.read_string(255)?; + // XXX: no unwrap! + let name = String::from_utf8(name).unwrap(); + entries.insert(name, (unknown_1, unknown_2, checksum, offset, size)); + } + + Ok(PBG3::new(entries, bitstream)) + } + + /// List all file entries in this PBG3 archive. + pub fn list_files(&self) -> hash_map::Keys { + self.entries.keys() + } + + /// Read a single file from this PBG3 archive. + pub fn get_file(&mut self, filename: String, check: bool) -> io::Result> { + // XXX: no unwrap! + let (_unknown_1, _unknown_2, checksum, offset, size) = self.entries.get(&filename).unwrap(); + self.bitstream.seek(io::SeekFrom::Start(*offset as u64))?; + let data = lzss::decompress(&mut self.bitstream.bitstream, *size as usize, 0x2000, 13, 4, 3)?; + if check { + // Verify the checksum. + let compressed_size = self.bitstream.tell()? as u32 - *offset; + self.bitstream.seek(io::SeekFrom::Start(*offset as u64))?; + let mut value: u32 = 0; + for c in self.bitstream.read_bytes(compressed_size as usize)? { + value += c as u32; + value &= 0xffffffff; + } + if value != *checksum { + return Err(io::Error::new(io::ErrorKind::Other, "Corrupted data!")); + } + } + Ok(data) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::SeekableSlice; + use std::fs::File; + + #[test] + fn bitstream() { + let data = SeekableSlice::new(b"Hello world!\0"); + let bitstream = BitStream::new(data); + let mut pbg3 = PBG3BitStream::new(bitstream); + assert_eq!(pbg3.read_string(42).unwrap(), b"Hello world!"); + } + + #[test] + fn file_present() { + let file = File::open("/home/linkmauve/games/pc/東方/TH06 ~ The Embodiment of Scarlet Devil/MD.DAT").unwrap(); + let file = io::BufReader::new(file); + let pbg3 = PBG3::from_file(file).unwrap(); + let files = pbg3.list_files().cloned().collect::>(); + assert!(files.contains(&String::from("th06_01.pos"))); + } + + #[test] + fn check_all_files() { + let file = File::open("/home/linkmauve/games/pc/東方/TH06 ~ The Embodiment of Scarlet Devil/MD.DAT").unwrap(); + let file = io::BufReader::new(file); + let mut pbg3 = PBG3::from_file(file).unwrap(); + let files = pbg3.list_files().cloned().collect::>(); + for filename in files { + pbg3.get_file(filename, true).unwrap(); + } + } +} diff --git a/src/util/bitstream.rs b/src/util/bitstream.rs new file mode 100644 --- /dev/null +++ b/src/util/bitstream.rs @@ -0,0 +1,143 @@ +//! Bitstream module. + +use std::io; + +/// Wrapper around any `Read` trait, to allow bit operations. +pub struct BitStream { + io: R, + remaining_bits: usize, + byte: u8, +} + +impl BitStream { + /// Create a new bitstream. + pub fn new(io: R) -> BitStream { + BitStream { + io, + remaining_bits: 0, + byte: 0, + } + } + + /// Seek inside the bitstream, ditching any unused data read. + pub fn seek(&mut self, seek_from: io::SeekFrom) -> io::Result { + self.remaining_bits = 0; + self.byte = 0; + self.io.seek(seek_from) + } + + fn fill_byte(&mut self) -> io::Result<()> { + assert!(self.remaining_bits == 0); + + let mut buf = [0u8; 1]; + self.io.read_exact(&mut buf)?; + self.byte = buf[0]; + self.remaining_bits = 8; + Ok(()) + } + + /// Read only one bit from the stream. + pub fn read_bit(&mut self) -> io::Result { + if self.remaining_bits == 0 { + self.fill_byte()?; + } + self.remaining_bits -= 1; + Ok((self.byte >> self.remaining_bits) & 0x01 != 0) + } + + /// Read `nb_bits` bits from the stream. + pub fn read(&mut self, nb_bits: usize) -> io::Result { + let mut nb_bits2 = nb_bits; + let mut value: usize = 0; + while nb_bits2 > 0 { + if self.remaining_bits == 0 { + self.fill_byte()?; + } + let read = if nb_bits2 > self.remaining_bits { self.remaining_bits } else { nb_bits2 }; + nb_bits2 -= read; + self.remaining_bits -= read; + value |= (self.byte as usize >> self.remaining_bits) << nb_bits2; + } + Ok(value & ((1 << nb_bits) - 1)) + } + + /// Read a given amount of bytes. + pub fn read_bytes(&mut self, nb_bytes: usize) -> io::Result> { + let mut buf = vec![0u8; nb_bytes]; + self.io.read_exact(&mut buf)?; + Ok(buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::SeekableSlice; + + #[test] + fn bit_by_bit() { + let data = SeekableSlice::new(&[1, 2, 3]); + let mut bitstream = BitStream::new(data); + + // 1 + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), true); + + // 2 + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), true); + assert_eq!(bitstream.read_bit().unwrap(), false); + + // 3 + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read_bit().unwrap(), true); + assert_eq!(bitstream.read_bit().unwrap(), true); + + // Can’t read after the end. + bitstream.read_bit().unwrap_err(); + } + + #[test] + fn byte_by_byte() { + let data = SeekableSlice::new(&[1, 2, 3]); + let mut bitstream = BitStream::new(data); + + assert_eq!(bitstream.read(8).unwrap(), 1); + assert_eq!(bitstream.read(8).unwrap(), 2); + assert_eq!(bitstream.read(8).unwrap(), 3); + + // Can’t read after the end. + bitstream.read(1).unwrap_err(); + } + + #[test] + fn unaligned_bytes() { + let data = SeekableSlice::new(&[0, 129, 1, 128]); + let mut bitstream = BitStream::new(data); + + assert_eq!(bitstream.read_bit().unwrap(), false); + assert_eq!(bitstream.read(8).unwrap(), 1); + assert_eq!(bitstream.read(8).unwrap(), 2); + assert_eq!(bitstream.read(8).unwrap(), 3); + assert_eq!(bitstream.read(7).unwrap(), 0); + + // Can’t read after the end. + bitstream.read(1).unwrap_err(); + } +} diff --git a/src/util/lzss.rs b/src/util/lzss.rs new file mode 100644 --- /dev/null +++ b/src/util/lzss.rs @@ -0,0 +1,57 @@ +//! LZSS implementation. + +use std::io; +use crate::util::bitstream::BitStream; + +/// Decompresses a LZSS-compressed file. +pub fn decompress(bitstream: &mut BitStream, size: usize, dictionary_size: usize, offset_size: usize, length_size: usize, minimum_match_length: usize) -> io::Result> { + let mut data = vec![0; size]; + let mut dictionary = vec![0; dictionary_size]; + let mut dictionary_head = 1; + let mut ptr = 0; + + while ptr < size { + if bitstream.read_bit()? { + // The `flag` bit is set, indicating the upcoming chunk of data is a literal. + // Add it to the uncompressed file, and store it in the dictionary. + let byte = bitstream.read(8)? as u8; + dictionary[dictionary_head] = byte; + dictionary_head = (dictionary_head + 1) % dictionary_size; + data[ptr] = byte; + ptr += 1; + } else { + // The `flag` bit is not set, the upcoming chunk is a (offset, length) tuple. + let offset = bitstream.read(offset_size)?; + let length = bitstream.read(length_size)? + minimum_match_length; + if ptr + length > size { + return Err(io::Error::new(io::ErrorKind::Other, "Oh no!")); + } + if offset == 0 && length == 0 { + break; + } + for i in offset..offset + length { + data[ptr] = dictionary[i % dictionary_size]; + dictionary[dictionary_head] = dictionary[i % dictionary_size]; + dictionary_head = (dictionary_head + 1) % dictionary_size; + ptr += 1; + } + } + } + + Ok(data) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::util::SeekableSlice; + + #[test] + #[ignore] + fn bit_by_bit() { + // TODO: find actual lzss data. + let data = SeekableSlice::new(&[0, 0, 0]); + let mut bitstream = BitStream::new(data); + decompress(&mut bitstream, 3, 0x2000, 13, 4, 3).unwrap(); + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs new file mode 100644 --- /dev/null +++ b/src/util/mod.rs @@ -0,0 +1,50 @@ +//! Module containing a bunch of helper modules. + +pub mod bitstream; +pub mod lzss; + +#[cfg(test)] +use std::io; + +#[cfg(test)] +pub struct SeekableSlice<'a> { + slice: &'a [u8], + cursor: usize, +} + +#[cfg(test)] +impl SeekableSlice<'_> { + pub fn new(slice: &[u8]) -> SeekableSlice { + SeekableSlice { + slice, + cursor: 0, + } + } +} + +#[cfg(test)] +impl io::Read for SeekableSlice<'_> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let length = (&self.slice[self.cursor..]).read(buf)?; + self.cursor += length; + Ok(length) + } +} + +#[cfg(test)] +impl io::Seek for SeekableSlice<'_> { + fn seek(&mut self, seek_from: io::SeekFrom) -> io::Result { + match seek_from { + io::SeekFrom::Start(offset) => { + self.cursor = offset as usize; + } + io::SeekFrom::End(offset) => { + self.cursor = (self.slice.len() as i64 + offset) as usize; + } + io::SeekFrom::Current(offset) => { + self.cursor = (self.cursor as i64 + offset) as usize; + } + } + Ok(self.cursor as u64) + } +}