diff formats/src/th06/msg.rs @ 782:a30ce01b9154

formats: Rewrite msg parsing in Rust
author Link Mauve <linkmauve@linkmauve.fr>
date Thu, 20 Nov 2025 19:02:19 +0100
parents
children
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/formats/src/th06/msg.rs
@@ -0,0 +1,165 @@
+//! MSG format support.
+
+use encoding_rs::SHIFT_JIS;
+use nom::{
+    multi::length_count,
+    number::complete::{le_u16, le_u32, le_u8},
+    IResult, Parser,
+};
+use std::collections::BTreeMap;
+
+/// Parse a SHIFT_JIS byte string of length 34 into a String.
+#[allow(non_snake_case)]
+pub fn le_String(i: &[u8]) -> IResult<&[u8], String> {
+    let data = i.splitn(2, |c| *c == b'\0').nth(0).unwrap();
+    let (string, _encoding, _replaced) = SHIFT_JIS.decode(data);
+    Ok((b"", string.into_owned()))
+}
+
+/// A single instruction, part of a `Script`.
+#[derive(Debug, Clone)]
+pub struct Call {
+    /// Time at which this instruction will be called.
+    pub time: u16,
+
+    /// The instruction to call.
+    pub instr: Instruction,
+}
+
+/// Main struct of the MSG format.
+#[derive(Debug, Clone)]
+pub struct Msg {
+    /// Map of indices to scripts in this msg.
+    pub scripts: BTreeMap<u8, Vec<Call>>,
+}
+
+impl Msg {
+    /// Parse a slice of bytes into a `Msg` struct.
+    pub fn from_slice(data: &[u8]) -> IResult<&[u8], Msg> {
+        parse_msg.parse(data)
+    }
+}
+
+macro_rules! gen_match {
+    ($arg_type:ident) => {
+        ${concat(le_, $arg_type)}
+    };
+}
+
+macro_rules! declare_msg_instructions {
+    ($($opcode:tt => fn $name:ident($($arg:ident: $arg_type:ident),*)),*,) => {
+        /// Available instructions in a `Msg`.
+        #[allow(missing_docs)]
+        #[derive(Debug, Clone, PartialEq)]
+        pub enum Instruction {
+            $(
+                $name($($arg_type),*)
+            ),*
+        }
+
+        fn parse_instruction_args(mut i: &[u8], opcode: u8) -> IResult<&[u8], Instruction> {
+            let instr = match opcode {
+                $(
+                    $opcode => {
+                        $(
+                            let (i2, $arg) = gen_match!($arg_type)(i)?;
+                            i = i2;
+                        )*
+                        Instruction::$name($($arg),*)
+                    }
+                )*
+                // XXX: use a more specific error instead.
+                _ => return Err(nom::Err::Failure(nom::error::Error::new(i, nom::error::ErrorKind::Eof)))
+            };
+            Ok((i, instr))
+        }
+    };
+}
+
+declare_msg_instructions! {
+    0 => fn Unk1(),
+    1 => fn Enter(side: u16, effect: u16),
+    2 => fn ChangeFace(side: u16, index: u16),
+    3 => fn DisplayText(side: u16, index: u16, text: String),
+    4 => fn Pause(duration: u32),
+    5 => fn Animate(side: u16, effect: u16),
+    6 => fn SpawnEnemySprite(),
+    7 => fn ChangeMusic(track: u32),
+    8 => fn DisplayDescription(side: u16, index: u16, text: String),
+    9 => fn ShowScores(unk1: u32),
+    10 => fn Freeze(),
+    11 => fn NextStage(),
+    12 => fn Unk2(),
+    13 => fn SetAllowSkip(boolean: u32),
+    14 => fn Unk3(),
+}
+
+fn parse_msg(input: &[u8]) -> IResult<&[u8], Msg> {
+    let (mut i, entry_offsets) = length_count(le_u32, le_u32).parse(input)?;
+    let first_offset = entry_offsets[0];
+
+    let mut scripts = BTreeMap::new();
+    for (index, offset) in entry_offsets
+        .into_iter()
+        .enumerate()
+        .map(|(index, offset)| (index as u8, offset))
+    {
+        if input.len() < offset as usize {
+            return Err(nom::Err::Failure(nom::error::Error::new(
+                input,
+                nom::error::ErrorKind::Eof,
+            )));
+        }
+
+        // In EoSD, Reimu’s scripts start at 0, and Marisa’s ones at 10.
+        // If Reimu has less than 10 scripts, the remaining offsets are equal to her first.
+        if index > 0 && offset == first_offset {
+            continue;
+        }
+
+        i = &input[offset as usize..];
+        let mut instructions = Vec::new();
+        loop {
+            let (i2, (time, opcode, size)) = (le_u16, le_u8, le_u8).parse(i)?;
+            if time == 0 && opcode == 0 && size == 0 {
+                break;
+            }
+            let (i2, data) = (&i2[size as usize..], &i2[..size as usize]);
+            let (empty, instr) = parse_instruction_args(data, opcode)?;
+            assert!(empty.is_empty());
+            instructions.push(Call { time, instr });
+            i = i2;
+        }
+        scripts.insert(index, instructions);
+    }
+
+    Ok((i, Msg { scripts }))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs::File;
+    use std::io::{self, Read};
+
+    #[test]
+    fn msg() {
+        println!("{}", std::env::current_dir().unwrap().display());
+        let file = File::open("EoSD/ST/msg1.dat").unwrap();
+        let mut file = io::BufReader::new(file);
+        let mut buf = Vec::new();
+        file.read_to_end(&mut buf).unwrap();
+        let (_, msg) = Msg::from_slice(&buf).unwrap();
+        assert_eq!(msg.scripts.len(), 4);
+        assert_eq!(msg.scripts[&0].len(), 89);
+        assert_eq!(msg.scripts[&1].len(), 13);
+        assert_eq!(msg.scripts[&10].len(), 58);
+        assert_eq!(msg.scripts[&11].len(), 13);
+        let script = &msg.scripts[&0];
+        assert_eq!(script[3].time, 60);
+        assert_eq!(
+            script[3].instr,
+            Instruction::DisplayText(0, 0, String::from("久々のお仕事だわ。"))
+        );
+    }
+}