Mercurial > touhou
changeset 782:a30ce01b9154
formats: Rewrite msg parsing in Rust
| author | Link Mauve <linkmauve@linkmauve.fr> |
|---|---|
| date | Thu, 20 Nov 2025 19:02:19 +0100 |
| parents | 5b43c42fa680 |
| children | ec1e06402a97 |
| files | formats/src/th06/mod.rs formats/src/th06/msg.rs python/src/lib.rs pytouhou/formats/msg.py pytouhou/resource/loader.py |
| diffstat | 5 files changed, 214 insertions(+), 89 deletions(-) [+] |
line wrap: on
line diff
--- a/formats/src/th06/mod.rs +++ b/formats/src/th06/mod.rs @@ -4,3 +4,4 @@ pub mod anm0; pub mod ecl; pub mod std; +pub mod msg;
new file mode 100644 --- /dev/null +++ b/formats/src/th06/msg.rs @@ -0,0 +1,165 @@ +//! MSG format support. + +use encoding_rs::SHIFT_JIS; +use nom::{ + multi::length_count, + number::complete::{le_u16, le_u32, le_u8}, + IResult, Parser, +}; +use std::collections::BTreeMap; + +/// Parse a SHIFT_JIS byte string of length 34 into a String. +#[allow(non_snake_case)] +pub fn le_String(i: &[u8]) -> IResult<&[u8], String> { + let data = i.splitn(2, |c| *c == b'\0').nth(0).unwrap(); + let (string, _encoding, _replaced) = SHIFT_JIS.decode(data); + Ok((b"", string.into_owned())) +} + +/// A single instruction, part of a `Script`. +#[derive(Debug, Clone)] +pub struct Call { + /// Time at which this instruction will be called. + pub time: u16, + + /// The instruction to call. + pub instr: Instruction, +} + +/// Main struct of the MSG format. +#[derive(Debug, Clone)] +pub struct Msg { + /// Map of indices to scripts in this msg. + pub scripts: BTreeMap<u8, Vec<Call>>, +} + +impl Msg { + /// Parse a slice of bytes into a `Msg` struct. + pub fn from_slice(data: &[u8]) -> IResult<&[u8], Msg> { + parse_msg.parse(data) + } +} + +macro_rules! gen_match { + ($arg_type:ident) => { + ${concat(le_, $arg_type)} + }; +} + +macro_rules! declare_msg_instructions { + ($($opcode:tt => fn $name:ident($($arg:ident: $arg_type:ident),*)),*,) => { + /// Available instructions in a `Msg`. + #[allow(missing_docs)] + #[derive(Debug, Clone, PartialEq)] + pub enum Instruction { + $( + $name($($arg_type),*) + ),* + } + + fn parse_instruction_args(mut i: &[u8], opcode: u8) -> IResult<&[u8], Instruction> { + let instr = match opcode { + $( + $opcode => { + $( + let (i2, $arg) = gen_match!($arg_type)(i)?; + i = i2; + )* + Instruction::$name($($arg),*) + } + )* + // XXX: use a more specific error instead. + _ => return Err(nom::Err::Failure(nom::error::Error::new(i, nom::error::ErrorKind::Eof))) + }; + Ok((i, instr)) + } + }; +} + +declare_msg_instructions! { + 0 => fn Unk1(), + 1 => fn Enter(side: u16, effect: u16), + 2 => fn ChangeFace(side: u16, index: u16), + 3 => fn DisplayText(side: u16, index: u16, text: String), + 4 => fn Pause(duration: u32), + 5 => fn Animate(side: u16, effect: u16), + 6 => fn SpawnEnemySprite(), + 7 => fn ChangeMusic(track: u32), + 8 => fn DisplayDescription(side: u16, index: u16, text: String), + 9 => fn ShowScores(unk1: u32), + 10 => fn Freeze(), + 11 => fn NextStage(), + 12 => fn Unk2(), + 13 => fn SetAllowSkip(boolean: u32), + 14 => fn Unk3(), +} + +fn parse_msg(input: &[u8]) -> IResult<&[u8], Msg> { + let (mut i, entry_offsets) = length_count(le_u32, le_u32).parse(input)?; + let first_offset = entry_offsets[0]; + + let mut scripts = BTreeMap::new(); + for (index, offset) in entry_offsets + .into_iter() + .enumerate() + .map(|(index, offset)| (index as u8, offset)) + { + if input.len() < offset as usize { + return Err(nom::Err::Failure(nom::error::Error::new( + input, + nom::error::ErrorKind::Eof, + ))); + } + + // In EoSD, Reimu’s scripts start at 0, and Marisa’s ones at 10. + // If Reimu has less than 10 scripts, the remaining offsets are equal to her first. + if index > 0 && offset == first_offset { + continue; + } + + i = &input[offset as usize..]; + let mut instructions = Vec::new(); + loop { + let (i2, (time, opcode, size)) = (le_u16, le_u8, le_u8).parse(i)?; + if time == 0 && opcode == 0 && size == 0 { + break; + } + let (i2, data) = (&i2[size as usize..], &i2[..size as usize]); + let (empty, instr) = parse_instruction_args(data, opcode)?; + assert!(empty.is_empty()); + instructions.push(Call { time, instr }); + i = i2; + } + scripts.insert(index, instructions); + } + + Ok((i, Msg { scripts })) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::io::{self, Read}; + + #[test] + fn msg() { + println!("{}", std::env::current_dir().unwrap().display()); + let file = File::open("EoSD/ST/msg1.dat").unwrap(); + let mut file = io::BufReader::new(file); + let mut buf = Vec::new(); + file.read_to_end(&mut buf).unwrap(); + let (_, msg) = Msg::from_slice(&buf).unwrap(); + assert_eq!(msg.scripts.len(), 4); + assert_eq!(msg.scripts[&0].len(), 89); + assert_eq!(msg.scripts[&1].len(), 13); + assert_eq!(msg.scripts[&10].len(), 58); + assert_eq!(msg.scripts[&11].len(), 13); + let script = &msg.scripts[&0]; + assert_eq!(script[3].time, 60); + assert_eq!( + script[3].instr, + Instruction::DisplayText(0, 0, String::from("久々のお仕事だわ。")) + ); + } +}
--- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -3,7 +3,8 @@ use pyo3::types::{PyBytes, PyTuple}; use touhou_formats::th06::pbg3; use touhou_formats::th06::std as stage; -use std::collections::HashMap; +use touhou_formats::th06::msg; +use std::collections::{BTreeMap, HashMap}; use std::fs::File; use std::io::BufReader; use std::path::PathBuf; @@ -75,6 +76,42 @@ } } +#[pyclass(module = "libtouhou")] +struct PyMsg { + inner: msg::Msg, +} + +#[pymethods] +impl PyMsg { + #[getter] + fn msgs(&self, py: Python) -> BTreeMap<u8, Vec<(u16, u8, Py<PyTuple>)>> { + fn call_to_python(py: Python, call: &msg::Call) -> PyResult<(u16, u8, Py<PyTuple>)> { + let (opcode, args) = match &call.instr { + msg::Instruction::Unk1() => (0, ().into_pyobject(py)?), + msg::Instruction::Enter(side, effect) => (1, (side, effect).into_pyobject(py)?), + msg::Instruction::ChangeFace(side, index) => (2, (side, index).into_pyobject(py)?), + msg::Instruction::DisplayText(side, index, text) => (3, (side, index, text).into_pyobject(py)?), + msg::Instruction::Pause(duration) => (4, (duration,).into_pyobject(py)?), + msg::Instruction::Animate(side, effect) => (5, (side, effect).into_pyobject(py)?), + msg::Instruction::SpawnEnemySprite() => (6, ().into_pyobject(py)?), + msg::Instruction::ChangeMusic(track) => (7, (track,).into_pyobject(py)?), + msg::Instruction::DisplayDescription(side, index, text) => (8, (side, index, text).into_pyobject(py)?), + msg::Instruction::ShowScores(unk1) => (8, (unk1,).into_pyobject(py)?), + msg::Instruction::Freeze() => (10, ().into_pyobject(py)?), + msg::Instruction::NextStage() => (11, ().into_pyobject(py)?), + msg::Instruction::Unk2() => (12, ().into_pyobject(py)?), + msg::Instruction::SetAllowSkip(boolean) => (13, (boolean,).into_pyobject(py)?), + msg::Instruction::Unk3() => (14, ().into_pyobject(py)?), + }; + Ok((call.time, opcode, args.unbind())) + } + self.inner.scripts.iter().map(|(index, script)| ( + *index, + script.into_iter().map(|call| call_to_python(py, call).unwrap()).collect(), + )).collect() + } +} + /// A loader for Touhou files. #[pyclass(module = "libtouhou", subclass)] #[derive(Default)] @@ -146,8 +183,8 @@ } /// Return the given file as an io.BytesIO object. - fn get_file(&self, py: Python, name: String) -> PyResult<Py<PyAny>> { - let vec = self.get_file_internal(&name)?; + fn get_file(&self, py: Python, name: &str) -> PyResult<Py<PyAny>> { + let vec = self.get_file_internal(name)?; let bytes = PyBytes::new(py, &vec); let io = py.import("io")?; let bytesio_class = io.dict().get_item("BytesIO")?.unwrap(); @@ -155,11 +192,17 @@ Ok(file.unbind()) } - fn get_stage(&self, py: Python, name: String) -> PyResult<Py<PyStage>> { - let vec = self.get_file_internal(&name)?; + fn get_stage(&self, py: Python, name: &str) -> PyResult<Py<PyStage>> { + let vec = self.get_file_internal(name)?; let (_, inner) = stage::Stage::from_slice(&vec).unwrap(); Ok(Py::new(py, PyStage { inner })?) } + + fn get_msg(&self, py: Python, name: &str) -> PyResult<Py<PyMsg>> { + let vec = self.get_file_internal(name)?; + let (_, inner) = msg::Msg::from_slice(&vec).unwrap(); + Ok(Py::new(py, PyMsg { inner })?) + } } #[pymodule]
deleted file mode 100644 --- a/pytouhou/formats/msg.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- encoding: utf-8 -*- -## -## Copyright (C) 2011 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published -## by the Free Software Foundation; version 3 only. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## - -from struct import pack, unpack, calcsize - -from pytouhou.utils.helpers import get_logger - -logger = get_logger(__name__) - -class MSG: - _instructions = {0: ('', None), - 1: ('hh', None), - 2: ('hh', 'change_face'), - 3: ('hhs', 'display_dialog_line'), - 4: ('I', 'pause'), - 5: ('hh', 'switch'), - 6: ('', 'add_enemy_sprite'), - 7: ('I', 'change_music'), - 8: ('hhs', 'display_character_line'), - 9: ('I', 'show_scores'), - 10: ('', 'freeze'), - 11: ('', 'next_level'), - 12: ('', None), - 13: ('I', None), - 14: ('', None)} #TODO - - - def __init__(self): - self.msgs = {} - - - @classmethod - def read(cls, file): - entry_count, = unpack('<I', file.read(4)) - entry_offsets = unpack('<%dI' % entry_count, file.read(4 * entry_count)) - - msg = cls() - msg.msgs = {} - - for i, offset in enumerate(entry_offsets): - if msg.msgs and offset == entry_offsets[0]: # In EoSD, Reimu’s scripts start at 0, and Marisa’s ones at 10. - continue # If Reimu has less than 10 scripts, the remaining offsets are equal to her first. - - msg.msgs[i] = [] - file.seek(offset) - - while True: - time, opcode, size = unpack('<HBB', file.read(4)) - if time == 0 and opcode == 0: - break - data = file.read(size) - if opcode in cls._instructions: - fmt = '<%s' % cls._instructions[opcode][0] - if fmt.endswith('s'): - fmt = fmt[:-1] - fmt = '%s%ds' % (fmt, size - calcsize(fmt)) - args = unpack(fmt, data) - if fmt.endswith('s'): - args = args[:-1] + (args[-1].decode('shift_jis'),) - else: - args = (data, ) - logger.warning('unknown msg opcode %d', opcode) - - msg.msgs[i].append((time, opcode, args)) - - - return msg -
--- a/pytouhou/resource/loader.py +++ b/pytouhou/resource/loader.py @@ -48,11 +48,6 @@ return ECL.read(file) #TODO: modular - def get_msg(self, name): - file = self.get_file(name) - return MSG.read(file) #TODO: modular - - def get_sht(self, name): file = self.get_file(name) return SHT.read(file) #TODO: modular
