changeset 782:a30ce01b9154

formats: Rewrite msg parsing in Rust
author Link Mauve <linkmauve@linkmauve.fr>
date Thu, 20 Nov 2025 19:02:19 +0100
parents 5b43c42fa680
children ec1e06402a97
files formats/src/th06/mod.rs formats/src/th06/msg.rs python/src/lib.rs pytouhou/formats/msg.py pytouhou/resource/loader.py
diffstat 5 files changed, 214 insertions(+), 89 deletions(-) [+]
line wrap: on
line diff
--- a/formats/src/th06/mod.rs
+++ b/formats/src/th06/mod.rs
@@ -4,3 +4,4 @@
 pub mod anm0;
 pub mod ecl;
 pub mod std;
+pub mod msg;
new file mode 100644
--- /dev/null
+++ b/formats/src/th06/msg.rs
@@ -0,0 +1,165 @@
+//! MSG format support.
+
+use encoding_rs::SHIFT_JIS;
+use nom::{
+    multi::length_count,
+    number::complete::{le_u16, le_u32, le_u8},
+    IResult, Parser,
+};
+use std::collections::BTreeMap;
+
+/// Parse a SHIFT_JIS byte string of length 34 into a String.
+#[allow(non_snake_case)]
+pub fn le_String(i: &[u8]) -> IResult<&[u8], String> {
+    let data = i.splitn(2, |c| *c == b'\0').nth(0).unwrap();
+    let (string, _encoding, _replaced) = SHIFT_JIS.decode(data);
+    Ok((b"", string.into_owned()))
+}
+
+/// A single instruction, part of a `Script`.
+#[derive(Debug, Clone)]
+pub struct Call {
+    /// Time at which this instruction will be called.
+    pub time: u16,
+
+    /// The instruction to call.
+    pub instr: Instruction,
+}
+
+/// Main struct of the MSG format.
+#[derive(Debug, Clone)]
+pub struct Msg {
+    /// Map of indices to scripts in this msg.
+    pub scripts: BTreeMap<u8, Vec<Call>>,
+}
+
+impl Msg {
+    /// Parse a slice of bytes into a `Msg` struct.
+    pub fn from_slice(data: &[u8]) -> IResult<&[u8], Msg> {
+        parse_msg.parse(data)
+    }
+}
+
+macro_rules! gen_match {
+    ($arg_type:ident) => {
+        ${concat(le_, $arg_type)}
+    };
+}
+
+macro_rules! declare_msg_instructions {
+    ($($opcode:tt => fn $name:ident($($arg:ident: $arg_type:ident),*)),*,) => {
+        /// Available instructions in a `Msg`.
+        #[allow(missing_docs)]
+        #[derive(Debug, Clone, PartialEq)]
+        pub enum Instruction {
+            $(
+                $name($($arg_type),*)
+            ),*
+        }
+
+        fn parse_instruction_args(mut i: &[u8], opcode: u8) -> IResult<&[u8], Instruction> {
+            let instr = match opcode {
+                $(
+                    $opcode => {
+                        $(
+                            let (i2, $arg) = gen_match!($arg_type)(i)?;
+                            i = i2;
+                        )*
+                        Instruction::$name($($arg),*)
+                    }
+                )*
+                // XXX: use a more specific error instead.
+                _ => return Err(nom::Err::Failure(nom::error::Error::new(i, nom::error::ErrorKind::Eof)))
+            };
+            Ok((i, instr))
+        }
+    };
+}
+
+declare_msg_instructions! {
+    0 => fn Unk1(),
+    1 => fn Enter(side: u16, effect: u16),
+    2 => fn ChangeFace(side: u16, index: u16),
+    3 => fn DisplayText(side: u16, index: u16, text: String),
+    4 => fn Pause(duration: u32),
+    5 => fn Animate(side: u16, effect: u16),
+    6 => fn SpawnEnemySprite(),
+    7 => fn ChangeMusic(track: u32),
+    8 => fn DisplayDescription(side: u16, index: u16, text: String),
+    9 => fn ShowScores(unk1: u32),
+    10 => fn Freeze(),
+    11 => fn NextStage(),
+    12 => fn Unk2(),
+    13 => fn SetAllowSkip(boolean: u32),
+    14 => fn Unk3(),
+}
+
+fn parse_msg(input: &[u8]) -> IResult<&[u8], Msg> {
+    let (mut i, entry_offsets) = length_count(le_u32, le_u32).parse(input)?;
+    let first_offset = entry_offsets[0];
+
+    let mut scripts = BTreeMap::new();
+    for (index, offset) in entry_offsets
+        .into_iter()
+        .enumerate()
+        .map(|(index, offset)| (index as u8, offset))
+    {
+        if input.len() < offset as usize {
+            return Err(nom::Err::Failure(nom::error::Error::new(
+                input,
+                nom::error::ErrorKind::Eof,
+            )));
+        }
+
+        // In EoSD, Reimu’s scripts start at 0, and Marisa’s ones at 10.
+        // If Reimu has less than 10 scripts, the remaining offsets are equal to her first.
+        if index > 0 && offset == first_offset {
+            continue;
+        }
+
+        i = &input[offset as usize..];
+        let mut instructions = Vec::new();
+        loop {
+            let (i2, (time, opcode, size)) = (le_u16, le_u8, le_u8).parse(i)?;
+            if time == 0 && opcode == 0 && size == 0 {
+                break;
+            }
+            let (i2, data) = (&i2[size as usize..], &i2[..size as usize]);
+            let (empty, instr) = parse_instruction_args(data, opcode)?;
+            assert!(empty.is_empty());
+            instructions.push(Call { time, instr });
+            i = i2;
+        }
+        scripts.insert(index, instructions);
+    }
+
+    Ok((i, Msg { scripts }))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs::File;
+    use std::io::{self, Read};
+
+    #[test]
+    fn msg() {
+        println!("{}", std::env::current_dir().unwrap().display());
+        let file = File::open("EoSD/ST/msg1.dat").unwrap();
+        let mut file = io::BufReader::new(file);
+        let mut buf = Vec::new();
+        file.read_to_end(&mut buf).unwrap();
+        let (_, msg) = Msg::from_slice(&buf).unwrap();
+        assert_eq!(msg.scripts.len(), 4);
+        assert_eq!(msg.scripts[&0].len(), 89);
+        assert_eq!(msg.scripts[&1].len(), 13);
+        assert_eq!(msg.scripts[&10].len(), 58);
+        assert_eq!(msg.scripts[&11].len(), 13);
+        let script = &msg.scripts[&0];
+        assert_eq!(script[3].time, 60);
+        assert_eq!(
+            script[3].instr,
+            Instruction::DisplayText(0, 0, String::from("久々のお仕事だわ。"))
+        );
+    }
+}
--- a/python/src/lib.rs
+++ b/python/src/lib.rs
@@ -3,7 +3,8 @@
 use pyo3::types::{PyBytes, PyTuple};
 use touhou_formats::th06::pbg3;
 use touhou_formats::th06::std as stage;
-use std::collections::HashMap;
+use touhou_formats::th06::msg;
+use std::collections::{BTreeMap, HashMap};
 use std::fs::File;
 use std::io::BufReader;
 use std::path::PathBuf;
@@ -75,6 +76,42 @@
     }
 }
 
+#[pyclass(module = "libtouhou")]
+struct PyMsg {
+    inner: msg::Msg,
+}
+
+#[pymethods]
+impl PyMsg {
+    #[getter]
+    fn msgs(&self, py: Python) -> BTreeMap<u8, Vec<(u16, u8, Py<PyTuple>)>> {
+        fn call_to_python(py: Python, call: &msg::Call) -> PyResult<(u16, u8, Py<PyTuple>)> {
+            let (opcode, args) = match &call.instr {
+                msg::Instruction::Unk1() => (0, ().into_pyobject(py)?),
+                msg::Instruction::Enter(side, effect) => (1, (side, effect).into_pyobject(py)?),
+                msg::Instruction::ChangeFace(side, index) => (2, (side, index).into_pyobject(py)?),
+                msg::Instruction::DisplayText(side, index, text) => (3, (side, index, text).into_pyobject(py)?),
+                msg::Instruction::Pause(duration) => (4, (duration,).into_pyobject(py)?),
+                msg::Instruction::Animate(side, effect) => (5, (side, effect).into_pyobject(py)?),
+                msg::Instruction::SpawnEnemySprite() => (6, ().into_pyobject(py)?),
+                msg::Instruction::ChangeMusic(track) => (7, (track,).into_pyobject(py)?),
+                msg::Instruction::DisplayDescription(side, index, text) => (8, (side, index, text).into_pyobject(py)?),
+                msg::Instruction::ShowScores(unk1) => (8, (unk1,).into_pyobject(py)?),
+                msg::Instruction::Freeze() => (10, ().into_pyobject(py)?),
+                msg::Instruction::NextStage() => (11, ().into_pyobject(py)?),
+                msg::Instruction::Unk2() => (12, ().into_pyobject(py)?),
+                msg::Instruction::SetAllowSkip(boolean) => (13, (boolean,).into_pyobject(py)?),
+                msg::Instruction::Unk3() => (14, ().into_pyobject(py)?),
+            };
+            Ok((call.time, opcode, args.unbind()))
+        }
+        self.inner.scripts.iter().map(|(index, script)| (
+            *index,
+            script.into_iter().map(|call| call_to_python(py, call).unwrap()).collect(),
+        )).collect()
+    }
+}
+
 /// A loader for Touhou files.
 #[pyclass(module = "libtouhou", subclass)]
 #[derive(Default)]
@@ -146,8 +183,8 @@
     }
 
     /// Return the given file as an io.BytesIO object.
-    fn get_file(&self, py: Python, name: String) -> PyResult<Py<PyAny>> {
-        let vec = self.get_file_internal(&name)?;
+    fn get_file(&self, py: Python, name: &str) -> PyResult<Py<PyAny>> {
+        let vec = self.get_file_internal(name)?;
         let bytes = PyBytes::new(py, &vec);
         let io = py.import("io")?;
         let bytesio_class = io.dict().get_item("BytesIO")?.unwrap();
@@ -155,11 +192,17 @@
         Ok(file.unbind())
     }
 
-    fn get_stage(&self, py: Python, name: String) -> PyResult<Py<PyStage>> {
-        let vec = self.get_file_internal(&name)?;
+    fn get_stage(&self, py: Python, name: &str) -> PyResult<Py<PyStage>> {
+        let vec = self.get_file_internal(name)?;
         let (_, inner) = stage::Stage::from_slice(&vec).unwrap();
         Ok(Py::new(py, PyStage { inner })?)
     }
+
+    fn get_msg(&self, py: Python, name: &str) -> PyResult<Py<PyMsg>> {
+        let vec = self.get_file_internal(name)?;
+        let (_, inner) = msg::Msg::from_slice(&vec).unwrap();
+        Ok(Py::new(py, PyMsg { inner })?)
+    }
 }
 
 #[pymodule]
deleted file mode 100644
--- a/pytouhou/formats/msg.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# -*- encoding: utf-8 -*-
-##
-## Copyright (C) 2011 Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
-##
-## This program is free software; you can redistribute it and/or modify
-## it under the terms of the GNU General Public License as published
-## by the Free Software Foundation; version 3 only.
-##
-## This program is distributed in the hope that it will be useful,
-## but WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-## GNU General Public License for more details.
-##
-
-from struct import pack, unpack, calcsize
-
-from pytouhou.utils.helpers import get_logger
-
-logger = get_logger(__name__)
-
-class MSG:
-    _instructions = {0: ('', None),
-                     1: ('hh', None),
-                     2: ('hh', 'change_face'),
-                     3: ('hhs', 'display_dialog_line'),
-                     4: ('I', 'pause'),
-                     5: ('hh', 'switch'),
-                     6: ('', 'add_enemy_sprite'),
-                     7: ('I', 'change_music'),
-                     8: ('hhs', 'display_character_line'),
-                     9: ('I', 'show_scores'),
-                     10: ('', 'freeze'),
-                     11: ('', 'next_level'),
-                     12: ('', None),
-                     13: ('I', None),
-                     14: ('', None)} #TODO
-
-
-    def __init__(self):
-        self.msgs = {}
-
-
-    @classmethod
-    def read(cls, file):
-        entry_count, = unpack('<I', file.read(4))
-        entry_offsets = unpack('<%dI' % entry_count, file.read(4 * entry_count))
-
-        msg = cls()
-        msg.msgs = {}
-
-        for i, offset in enumerate(entry_offsets):
-            if msg.msgs and offset == entry_offsets[0]: # In EoSD, Reimu’s scripts start at 0, and Marisa’s ones at 10.
-                continue                                # If Reimu has less than 10 scripts, the remaining offsets are equal to her first.
-
-            msg.msgs[i] = []
-            file.seek(offset)
-
-            while True:
-                time, opcode, size = unpack('<HBB', file.read(4))
-                if time == 0 and opcode == 0:
-                    break
-                data = file.read(size)
-                if opcode in cls._instructions:
-                    fmt = '<%s' % cls._instructions[opcode][0]
-                    if fmt.endswith('s'):
-                        fmt = fmt[:-1]
-                        fmt = '%s%ds' % (fmt, size - calcsize(fmt))
-                    args = unpack(fmt, data)
-                    if fmt.endswith('s'):
-                        args = args[:-1] + (args[-1].decode('shift_jis'),)
-                else:
-                    args = (data, )
-                    logger.warning('unknown msg opcode %d', opcode)
-
-                msg.msgs[i].append((time, opcode, args))
-
-
-        return msg
-
--- a/pytouhou/resource/loader.py
+++ b/pytouhou/resource/loader.py
@@ -48,11 +48,6 @@
         return ECL.read(file) #TODO: modular
 
 
-    def get_msg(self, name):
-        file = self.get_file(name)
-        return MSG.read(file) #TODO: modular
-
-
     def get_sht(self, name):
         file = self.get_file(name)
         return SHT.read(file) #TODO: modular