diff fmgen/opna.c @ 0:c55ea9478c80

Hello Gensokyo!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Tue, 21 May 2013 10:29:21 +0200
parents
children
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/fmgen/opna.c
@@ -0,0 +1,1314 @@
+// FIXME: move ugly-ass legalese somewhere where it won't be seen
+// by anyone other than lawyers. (/dev/null would be ideal but sadly
+// we live in an imperfect world).
+/* Copyright (c) 2012/2013, Peter Barfuss
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met: 
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution. 
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+#include <stdint.h>
+#include <math.h>
+#include <unistd.h>
+#include "op.h"
+#include "psg.h"
+#include "opna.h"
+static const uint8_t notetab[128] =
+{
+     0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  3,  3,  3,  3,  3,  3,
+     4,  4,  4,  4,  4,  4,  4,  5,  6,  7,  7,  7,  7,  7,  7,  7,
+     8,  8,  8,  8,  8,  8,  8,  9, 10, 11, 11, 11, 11, 11, 11, 11,
+    12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 15, 15, 15, 15, 15, 15,
+    16, 16, 16, 16, 16, 16, 16, 17, 18, 19, 19, 19, 19, 19, 19, 19,
+    20, 20, 20, 20, 20, 20, 20, 21, 22, 23, 23, 23, 23, 23, 23, 23,
+    24, 24, 24, 24, 24, 24, 24, 25, 26, 27, 27, 27, 27, 27, 27, 27,
+    28, 28, 28, 28, 28, 28, 28, 29, 30, 31, 31, 31, 31, 31, 31, 31,
+};
+
+static const int8_t dttab[256] =
+{
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  2,  2,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,
+      4,  6,  6,  6,  8,  8,  8, 10, 10, 12, 12, 14, 16, 16, 16, 16,
+      2,  2,  2,  2,  4,  4,  4,  4,  4,  6,  6,  6,  8,  8,  8, 10,
+     10, 12, 12, 14, 16, 16, 18, 20, 22, 24, 26, 28, 32, 32, 32, 32,
+      4,  4,  4,  4,  4,  6,  6,  6,  8,  8,  8, 10, 10, 12, 12, 14,
+     16, 16, 18, 20, 22, 24, 26, 28, 32, 34, 38, 40, 44, 44, 44, 44,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0, -2, -2, -2, -2, -2, -2, -2, -2, -4, -4, -4, -4,
+     -4, -6, -6, -6, -8, -8, -8,-10,-10,-12,-12,-14,-16,-16,-16,-16,
+     -2, -2, -2, -2, -4, -4, -4, -4, -4, -6, -6, -6, -8, -8, -8,-10,
+    -10,-12,-12,-14,-16,-16,-18,-20,-22,-24,-26,-28,-32,-32,-32,-32,
+     -4, -4, -4, -4, -4, -6, -6, -6, -8, -8, -8,-10,-10,-12,-12,-14,
+    -16,-16,-18,-20,-22,-24,-26,-28,-32,-34,-38,-40,-44,-44,-44,-44,
+};
+
+static uint8_t gaintab[64] = {
+    0xff, 0xea, 0xd7, 0xc5, 0xb5, 0xa6, 0x98, 0x8b, 0x80, 0x75, 0x6c, 0x63, 0x5a, 0x53, 0x4c, 0x46,
+    0x40, 0x3b, 0x36, 0x31, 0x2d, 0x2a, 0x26, 0x23, 0x20, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x12,
+    0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x0a, 0x09, 0x08, 0x07, 0x07, 0x06, 0x06, 0x05, 0x05, 0x04,
+    0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x01,
+};
+
+// sinf(M_PI*(2*i+1)/1024.0f), i=0,...,511.
+// Should make this twice as large (so a duplicate of the top 512, but with the other half of the
+// interval [0,2*M_PI], therefore the negative of the first half), and then get rid of the
+// silly hack in Sinetable(). However, I'm not actually sure which will use less gates on an FPGA,
+// and there's really no speed difference on any machine newer than a 6502, probably.
+static uint16_t sinetable[512] = {
+    0x1, 0x2, 0x4, 0x5, 0x7, 0x9, 0xa, 0xc, 0xd, 0xf, 0x10, 0x12, 0x14, 0x15, 0x17, 0x18, 
+    0x1a, 0x1b, 0x1d, 0x1f, 0x20, 0x22, 0x23, 0x25, 0x26, 0x28, 0x29, 0x2b, 0x2d, 0x2e, 0x30, 0x31, 
+    0x33, 0x34, 0x36, 0x37, 0x39, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42, 0x44, 0x45, 0x47, 0x48, 0x4a, 
+    0x4b, 0x4d, 0x4e, 0x50, 0x51, 0x53, 0x54, 0x56, 0x57, 0x58, 0x5a, 0x5b, 0x5d, 0x5e, 0x60, 0x61, 
+    0x63, 0x64, 0x66, 0x67, 0x68, 0x6a, 0x6b, 0x6d, 0x6e, 0x70, 0x71, 0x72, 0x74, 0x75, 0x77, 0x78, 
+    0x79, 0x7b, 0x7c, 0x7d, 0x7f, 0x80, 0x82, 0x83, 0x84, 0x86, 0x87, 0x88, 0x8a, 0x8b, 0x8c, 0x8e, 
+    0x8f, 0x90, 0x91, 0x93, 0x94, 0x95, 0x97, 0x98, 0x99, 0x9a, 0x9c, 0x9d, 0x9e, 0x9f, 0xa1, 0xa2, 
+    0xa3, 0xa4, 0xa5, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 
+    0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 
+    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xde, 0xdf, 0xe0, 0xe1, 0xe1, 
+    0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe6, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xec, 
+    0xed, 0xed, 0xee, 0xef, 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 
+    0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xf9, 0xfa, 0xfa, 0xfa, 0xfb, 0xfb, 
+    0xfb, 0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xff, 
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 
+    0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
+    0xff, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfd, 0xfd, 0xfd, 0xfd, 0xfc, 0xfc, 0xfc, 0xfc, 0xfb, 
+    0xfb, 0xfb, 0xfa, 0xfa, 0xfa, 0xf9, 0xf9, 0xf9, 0xf8, 0xf8, 0xf7, 0xf7, 0xf7, 0xf6, 0xf6, 0xf5, 
+    0xf5, 0xf4, 0xf4, 0xf3, 0xf3, 0xf2, 0xf2, 0xf1, 0xf1, 0xf0, 0xf0, 0xef, 0xef, 0xee, 0xed, 0xed, 
+    0xec, 0xec, 0xeb, 0xea, 0xea, 0xe9, 0xe8, 0xe8, 0xe7, 0xe6, 0xe6, 0xe5, 0xe4, 0xe4, 0xe3, 0xe2, 
+    0xe1, 0xe1, 0xe0, 0xdf, 0xde, 0xde, 0xdd, 0xdc, 0xdb, 0xda, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 
+    0xd4, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 
+    0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 
+    0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa5, 0xa4, 0xa3, 
+    0xa2, 0xa1, 0x9f, 0x9e, 0x9d, 0x9c, 0x9a, 0x99, 0x98, 0x97, 0x95, 0x94, 0x93, 0x91, 0x90, 0x8f, 
+    0x8e, 0x8c, 0x8b, 0x8a, 0x88, 0x87, 0x86, 0x84, 0x83, 0x82, 0x80, 0x7f, 0x7d, 0x7c, 0x7b, 0x79, 
+    0x78, 0x77, 0x75, 0x74, 0x72, 0x71, 0x70, 0x6e, 0x6d, 0x6b, 0x6a, 0x68, 0x67, 0x66, 0x64, 0x63, 
+    0x61, 0x60, 0x5e, 0x5d, 0x5b, 0x5a, 0x58, 0x57, 0x56, 0x54, 0x53, 0x51, 0x50, 0x4e, 0x4d, 0x4b, 
+    0x4a, 0x48, 0x47, 0x45, 0x44, 0x42, 0x40, 0x3f, 0x3d, 0x3c, 0x3a, 0x39, 0x37, 0x36, 0x34, 0x33, 
+    0x31, 0x30, 0x2e, 0x2d, 0x2b, 0x29, 0x28, 0x26, 0x25, 0x23, 0x22, 0x20, 0x1f, 0x1d, 0x1b, 0x1a, 
+    0x18, 0x17, 0x15, 0x14, 0x12, 0x10, 0xf, 0xd, 0xc, 0xa, 0x9, 0x7, 0x6, 0x4, 0x2, 0x1, 
+};
+
+static uint8_t tablemade = false;
+static uint8_t cltab[512];
+static uint32_t tltab[FM_TLENTS];
+static uint32_t lfotab[8];
+static const uint8_t fbtab[8] = { 31, 7, 6, 5, 4, 3, 2, 1 };
+
+/* Amplitude/Phase modulation tables. */
+static const float pms[8] = { 0, 1/720., 2/720., 3/720.,  4/720.,  6/720., 12/720.,  24/720. };    // OPNA
+static const uint8_t amt[4] = { 29, 4, 2, 1 }; // OPNA
+static int     pmtable[8][FM_LFOENTS];
+uint8_t aml, pml;
+int     pmv;
+
+// ---------------------------------------------------------------------------
+static inline void LFO(OPNA *opna)
+{
+    uint8_t c = (opna->lfocount >> FM_LFOCBITS) & 0xff;
+    opna->lfocount += opna->lfodcount;
+    if (c < 0x40)       pml = c * 2 + 0x80;
+    else if (c < 0xc0)  pml = 0x7f - (c - 0x40) * 2 + 0x80;
+    else                pml = (c - 0xc0) * 2;
+    if (c < 0x80)       aml = (c << 1);
+    else                aml = ~(c << 1);
+}
+
+// ---------------------------------------------------------------------------
+// Magic. No, really.
+// In reality this just initialises some tables used by everything else,
+// that are dependent on both the chip clock and the "DAC" samplerate.
+// The hilarious thing though is that this is really the only place where
+// the chip clock value gets actually *used*, and even then it's indirectly
+// via the ratio parameter.
+//
+static uint32_t currentratio = ~0;
+static float rr;
+static uint32_t ratetable[64];
+static void MakeTimeTable(uint32_t ratio)
+{
+    int h, l;
+
+    if (ratio != currentratio)
+    {
+        currentratio = ratio;
+        // PG Part
+        rr = (float)ratio / (1 << (2 + FM_RATIOBITS - FM_PGBITS));
+
+        // EG
+        for (h=1; h<16; h++)
+        {
+            for (l=0; l<4; l++)
+            {
+                int m = h == 15 ? 8 : l+4;
+                ratetable[h*4+l] =
+                    ((ratio << (FM_EGBITS - 3 - FM_RATIOBITS)) << Min(h, 11)) * m;
+            }
+        }
+        ratetable[0] = ratetable[1] = ratetable[2] = ratetable[3] = 0;
+        ratetable[5] = ratetable[4],  ratetable[7] = ratetable[6];
+    }
+}
+
+static void SetEGRate(FMOperator *op, uint r)
+{
+    op->egstepd = ratetable[r];
+    op->egtransa = Limit(15 - (r>>2), 4, 1);
+    op->egtransd = 16 >> op->egtransa;
+}
+
+// Standard operator init routine. Zeros out some more stuff
+// than OperatorReset() does, then calls OperatorReset().
+//
+void OperatorInit(FMOperator *op)
+{
+    // EG Part
+    op->ar = op->dr = op->sr = op->rr = op->ksr = 0;
+    op->ams = 0;
+    op->mute = false;
+    op->keyon = false;
+
+    // PG Part
+    op->multiple = 0;
+    op->detune = 0;
+
+    // LFO
+    op->ms = 0;
+
+    OperatorReset(op);
+}
+
+// Standard operator reset routine. Init EG/PG to defaults,
+// clear any stored samples, then force a reinit of EG/PG
+// in OperatorPrepare() below by setting paramchanged to 1.
+//
+void OperatorReset(FMOperator *op)
+{
+    // EG part
+    op->tl = op->tll = 127;
+    op->eglevel = 0xff;
+    op->eglvnext = 0x100;
+    SetEGRate(op, 0);
+    op->phase = off;
+    op->egstep = 0;
+
+    // PG part
+    op->pgcount = 0;
+
+    // OP part
+    op->out = op->out2 = 0;
+    op->paramchanged = true;
+}
+
+// Init EG, PG.
+// PG init is trivial, simply set pgdcount (phase counter increment)
+// based on multiple, detune and bn.
+// See Pages 24-26 of the OPNA manual for details.
+// EG init is your standard ADSR state machine. Should (hopefully!)
+// be self-explanatory, especially if you've ever seen a software implementation
+// of ADSR before (seriously, they're all the damn same).
+//
+void OperatorPrepare(FMOperator *op)
+{
+    if (op->paramchanged)
+    {
+        uint8_t l = ((op->multiple) ? 2*op->multiple : 1);
+        op->paramchanged = false;
+        //  PG Part
+        op->pgdcount = (op->dp + dttab[op->detune + op->bn]) * (uint32_t)(l * rr);
+        op->pgdcountl = op->pgdcount >> 11;
+
+        // EG Part
+        op->ksr = op->bn >> (3-op->ks);
+
+        switch (op->phase)
+        {
+        case attack:
+            SetEGRate(op, op->ar ? Min(63, op->ar+op->ksr) : 0);
+            break;
+        case decay:
+            SetEGRate(op, op->dr ? Min(63, op->dr+op->ksr) : 0);
+            op->eglvnext = op->sl * 8;
+            break;
+        case sustain:
+            SetEGRate(op, op->sr ? Min(63, op->sr+op->ksr) : 0);
+            break;
+        case release:
+            SetEGRate(op, Min(63, op->rr+op->ksr));
+            break;
+        case next: // temporal
+            break;
+        case off:  // temporal
+            break;
+        }
+        // LFO
+        op->ams = (op->amon ? (op->ms >> 4) & 3 : 0);
+    }
+}
+
+// FIXME: Rename. "Phase" here refers to ADSR DFA state,
+// not PG/sine table phase. Also, yeah, this does the
+// ADSR DFA state transitions.
+//
+static void ShiftPhase(FMOperator *op, EGPhase nextphase)
+{
+    switch (nextphase)
+    {
+    case attack:        // Attack Phase
+        op->tl = op->tll;
+        if ((op->ar+op->ksr) < 62) {
+            SetEGRate(op, op->ar ? Min(63, op->ar+op->ksr) : 0);
+            op->phase = attack;
+            break;
+        }
+    case decay:         // Decay Phase
+        if (op->sl) {
+            op->eglevel = 0;
+            op->eglvnext = op->sl*8;
+            SetEGRate(op, op->dr ? Min(63, op->dr+op->ksr) : 0);
+            op->phase = decay;
+            break;
+        }
+    case sustain:       // Sustain Phase
+        op->eglevel = op->sl*8;
+        op->eglvnext = 0x100;
+        SetEGRate(op, op->sr ? Min(63, op->sr+op->ksr) : 0);
+        op->phase = sustain;
+        break;
+
+    case release:       // Release Phase
+        if (op->phase == attack || (op->eglevel < 0x100/* && phase != off*/)) {
+            op->eglvnext = 0x100;
+            SetEGRate(op, Min(63, op->rr+op->ksr));
+            op->phase = release;
+            break;
+        }
+    case off:           // off
+    default:
+        op->eglevel = 0xff;
+        op->eglvnext = 0x100;
+        SetEGRate(op, 0);
+        op->phase = off;
+        break;
+    }
+}
+
+//  Block/F-Num
+static inline void SetFNum(FMOperator *op, uint f)
+{
+    op->dp = (f & 2047) << ((f >> 11) & 7);
+    op->bn = notetab[(f >> 7) & 127];
+    op->paramchanged = true;
+}
+
+// Clock the EG for one operator.
+// Essentially just a call to ShiftPhase,
+// but decrements the output EG level if starting
+// from the attack phase, otherwise incrementing it.
+// Should probably integrate the special case for attack
+// from ShiftPhase() directly into here at some point.
+void EGCalc(FMOperator *op)
+{
+    op->egstep += 3L << (11 + FM_EGBITS);
+    if (op->phase == attack)
+    {
+        op->eglevel -= 1 + (op->eglevel >> op->egtransa);
+        if (op->eglevel <= 0)
+            ShiftPhase(op, decay);
+    }
+    else
+    {
+        op->eglevel += op->egtransd;
+        if (op->eglevel >= op->eglvnext)
+            ShiftPhase(op, (EGPhase)(op->phase+1));
+    }
+}
+
+// KeyOn, hopefully obvious.
+static void KeyOn(FMOperator *op)
+{
+    if (!op->keyon) {
+        op->keyon = true;
+        if (op->phase == off || op->phase == release) {
+            ShiftPhase(op, attack);
+            op->out = op->out2 = 0;
+            op->pgcount = 0;
+        }
+    }
+}
+
+// KeyOff, hopefully obvious.
+static void KeyOff(FMOperator *op)
+{
+    if (op->keyon) {
+        op->keyon = false;
+        ShiftPhase(op, release);
+    }
+}
+
+// PG uses 9 bits, with the table itsself using another 10 bits.
+// The top bits are the actually relevant ones, given that the PG increment will basically set
+// the lowest few bits to nonsense.
+// The hack there that checks for bit 10 in the right place and if yes, does some strange xor magic
+// makes the value of Sine() negative if we're in the top half of the [0,2*M_PI] interval.
+// It is, of course, one/two's complement specific, but I have yet to hear of an integer arithmetic implementation
+// on any modern machine that isn't at least one of those two. (In fact, I think they're all two's complement, even).
+#define Sine(s) sinetable[((s) >> (20+FM_PGBITS-FM_OPSINBITS))&(FM_OPSINENTS/2-1)]^(-(((s) & 0x10000000) >> 27))
+//#define LogToLin(x) cltab[x]
+
+static inline uint32_t LogToLin(uint32_t x) {
+    if(x >= 1024) {
+        return 0;
+    }
+    return cltab[x];
+}
+
+//  PG clock routine.
+//  Does this really need to be in its own function anymore?
+//  It's literally just a trivial increment of a counter now, nothing more.
+//  Its output, btw, is 2^(20+PGBITS) / cycle, with PGBITS=9 in this implementation.
+static inline uint32_t PGCalc(FMOperator *op)
+{
+    uint32_t ret = op->pgcount;
+    op->pgcount += op->pgdcount;
+    return ret;
+}
+
+// Same as above, but with PM if enabled.
+// Same comments as above apply.
+static inline uint32_t PGCalcL(FMOperator *op)
+{
+    uint32_t ret = op->pgcount;
+    op->pgcount += op->pgdcount + ((op->pgdcountl * pmv) >> 5);
+    return ret;
+}
+
+// Clock one FM operator. Does a lookup in the sine table
+// for the waveform to output, possibly frequency-modulating
+// that with the contents of in, then clocks the Phase Generator
+// for that operator, stores the output sample and returns.
+// Should probably integrate PGCalc() into this function,
+// at some point at least.
+static inline int32_t Calc(FMOperator *op, int32_t in)
+{
+    int32_t tmp = Sine(op->pgcount + (in << 7));
+    PGCalc(op);
+    op->out = op->egout*tmp;
+    return op->out;
+}
+
+// Version of the above that gets used when the chip-internal LFO
+// is enabled. Basically identical to the above, except with more Vibrato.
+static inline int32_t CalcL(FMOperator *op, int32_t in)
+{
+    int32_t tmp = Sine(op->pgcount + (in << 7));
+    PGCalcL(op);
+    op->out = op->egout*tmp;
+    return op->out;
+}
+
+// Clock operator 0. OP0 is special as it does not take an input from
+// another operator, rather it can frequency-modulate itsself via the
+// fb parameter (which specifies feedback amount). This is incredibly
+// useful, and makes it possible to define a lot more instruments
+// for the OPNA than you'd be able to otherwise.
+#define FM_PRECISEFEEDBACK 1
+static inline void CalcFB(FMOperator *op, uint fb)
+{
+    int32_t tmp;
+    int32_t in = op->out + op->out2;
+    op->out2 = op->out;
+    if (FM_PRECISEFEEDBACK && fb == 31)
+        tmp = Sine(op->pgcount);
+    else
+        tmp = Sine(op->pgcount + ((in << 6) >> fb));
+
+    PGCalc(op);
+    op->out = op->egout*tmp;
+}
+
+// Version of the above, but with 100% as much LFO as the above.
+// See comment above CalcL() for details/explanation.
+static inline void CalcFBL(FMOperator *op, uint fb)
+{
+    int32_t tmp;
+    int32_t in = op->out + op->out2;
+    op->out2 = op->out;
+
+    if (FM_PRECISEFEEDBACK && fb == 31)
+        tmp = Sine(op->pgcount);
+    else
+        tmp = Sine(op->pgcount + ((in << 6) >> fb));
+
+    PGCalcL(op);
+    op->out = op->egout*tmp;
+}
+
+// ---------------------------------------------------------------------------
+//  4-op Channel
+//  Sets the "algorithm", i.e. the connections between individual operators
+//  in a channel. See Page 22 of the manual for pretty drawings of all of the
+//  different algorithms supported by the OPNA.
+//
+static void SetAlgorithm(Channel4 *ch4, uint algo)
+{
+    static const uint8_t table1[8][6] =
+    {
+        { 0, 1, 1, 2, 2, 3 },   { 1, 0, 0, 1, 1, 2 },
+        { 1, 1, 1, 0, 0, 2 },   { 0, 1, 2, 1, 1, 2 },
+        { 0, 1, 2, 2, 2, 1 },   { 0, 1, 0, 1, 0, 1 },
+        { 0, 1, 2, 1, 2, 1 },   { 1, 0, 1, 0, 1, 0 },
+    };
+
+    ch4->idx[0] = table1[algo][0]; // in[0];
+    ch4->idx[1] = table1[algo][2]; // in[1];
+    ch4->idx[2] = table1[algo][4]; // in[2];
+    ch4->idx[3] = table1[algo][1]; // out[0];
+    ch4->idx[4] = table1[algo][3]; // out[1];
+    ch4->idx[5] = table1[algo][5]; // out[2];
+    ch4->op[0].out2 = ch4->op[0].out = 0;
+}
+
+static inline void Ch4Init(Channel4 *ch4)
+{
+    int i;
+    for(i=0; i<4; i++) {
+        OperatorInit(&ch4->op[i]);
+    }
+    SetAlgorithm(ch4, 0);
+    ch4->pms = pmtable[0];
+}
+
+// Reinit all operators on a given channel if paramchanged=true
+// for that channel, set the PM table for that channel, then determine
+// if there is any output from this channel, based on:
+// - mute state of each operator
+// - keyon state of each operator
+// - AM (Tremolo) enable for each operator.
+// Bit 0 of the return value is set if there is any output,
+// Bit 1 is set if tremolo is enabled for any of the operators on this
+// channel.
+static inline int Ch4Prepare(Channel4 *ch4)
+{
+    OperatorPrepare(&ch4->op[0]);
+    OperatorPrepare(&ch4->op[1]);
+    OperatorPrepare(&ch4->op[2]);
+    OperatorPrepare(&ch4->op[3]);
+
+    ch4->pms = pmtable[ch4->op[0].ms & 7];
+    if(ch4->op[0].mute && ch4->op[1].mute && ch4->op[2].mute && ch4->op[3].mute) return 0;
+    int key = (IsOn(&ch4->op[0]) | IsOn(&ch4->op[1]) | IsOn(&ch4->op[2]) | IsOn(&ch4->op[3])) ? 1 : 0;
+    int lfo = ch4->op[0].ms & (ch4->op[0].amon | ch4->op[1].amon | ch4->op[2].amon | ch4->op[3].amon ? 0x37 : 7) ? 2 : 0;
+    return key | lfo;
+}
+
+// Clock one channel. Clocks all the Envelope Generators in parallel
+// (well, okay, in sequence, but a hardware implementation *should*
+//  clock them in parallel as they are completely independent tasks,
+//  all that is important is that you don't execute Calc{L,FB,FBL}
+//  until all of the EGs are done clocking - but that should be, again,
+//  straightforward to implement in hardware).
+//
+int32_t Ch4Calc(Channel4 *ch4)
+{
+    int i;
+    ch4->buf[1] = ch4->buf[2] = ch4->buf[3] = 0;
+    for(i=0; i<4; i++) {
+        if ((ch4->op[i].egstep -= ch4->op[i].egstepd) < 0)
+            EGCalc(&ch4->op[i]);
+        ch4->op[i].egout = LogToLin(ch4->op[i].eglevel)*gaintab[ch4->op[i].tl];
+    }
+
+    ch4->buf[0] = ch4->op[0].out; CalcFB(&ch4->op[0], ch4->fb);
+    ch4->buf[ch4->idx[3]] += Calc(&ch4->op[1], ch4->buf[ch4->idx[0]]);
+    ch4->buf[ch4->idx[4]] += Calc(&ch4->op[2], ch4->buf[ch4->idx[1]]);
+    int o = ch4->op[3].out;
+    Calc(&ch4->op[3], ch4->buf[ch4->idx[2]]);
+    return ((ch4->buf[ch4->idx[5]] + o) >> 8);
+}
+
+// Same as above, but with LFO. Should see if I can merge the two somehow and just set
+// a flag whenever I want to mix in Vibrato/Tremolo effects. Also, this code is basically
+// completely untested, due to the surprising difficulty of finding test samples that
+// actually use the chip-internal LFO. (And if you've somehow found one of those,
+// now try *also* finding a good-quality recording of it being played back on the chip
+// itsself. I should go ask the folks at soundshock.se or something, come to think of it).
+//
+int32_t Ch4CalcL(Channel4 *ch4)
+{
+    int i;
+    pmv = ch4->pms[pml];
+    ch4->buf[1] = ch4->buf[2] = ch4->buf[3] = 0;
+    for(i=0; i<4; i++) {
+        if ((ch4->op[i].egstep -= ch4->op[i].egstepd) < 0)
+            EGCalc(&ch4->op[i]);
+        ch4->op[i].egout = (LogToLin(ch4->op[i].eglevel + (aml >> amt[ch4->op[i].ams]))*gaintab[ch4->op[i].tl]);
+    }
+
+    ch4->buf[0] = ch4->op[0].out; CalcFBL(&ch4->op[0], ch4->fb);
+    ch4->buf[ch4->idx[3]] += CalcL(&ch4->op[1], ch4->buf[ch4->idx[0]]);
+    ch4->buf[ch4->idx[4]] += CalcL(&ch4->op[2], ch4->buf[ch4->idx[1]]);
+    int o = ch4->op[3].out;
+    CalcL(&ch4->op[3], ch4->buf[ch4->idx[2]]);
+    return ((ch4->buf[ch4->idx[5]] + o) >> 8);
+}
+
+// This essentially initializes a couple constant tables
+// and chip-specific parameters based on what the chip clock and "DAC" samplerate
+// were set to in OPNAInit(). psgrate is always equal to the user-requested samplerate,
+// whereas rate is only equal to that in the interpolation=false case, otherwise
+// it's set to whatever value is needed to downsample 55466Hz to the user-requested
+// samplerate, which will (almost?) always be either 44100Hz or 48000Hz.
+// TODO: better-quality resampling may be of use here, possibly.
+//
+static void SetPrescaler(OPNA *opna, uint32_t p)
+{
+    static const char table[3][2] = { { 6, 4 }, { 3, 2 }, { 2, 1 } };
+    static const uint8_t table2[8] = { 109,  78,  72,  68,  63,  45,  9,  6 };
+    // 512
+    if (opna->prescale != p)
+    {
+        opna->prescale = p;
+        uint32_t i, fmclock = opna->clock / table[p][0] / 12;
+
+        if (opna->interpolation) {
+            opna->rate = fmclock * 2;
+            do {
+                opna->rate >>= 1;
+                opna->mpratio = opna->psgrate * 16384 / opna->rate;
+            } while (opna->mpratio <= 8192);
+        } else {
+            opna->rate = opna->psgrate;
+        }
+        uint32_t ratio = ((fmclock << FM_RATIOBITS) + opna->rate/2) / opna->rate;
+        opna->timer_step = (int32_t)(1000000.0f * 65536.0f/fmclock);
+        MakeTimeTable(ratio);
+        PSGSetClock(&opna->psg, opna->clock / table[p][1], opna->psgrate);
+
+        for (i=0; i<8; i++) {
+            lfotab[i] = (ratio << (1+FM_LFOCBITS-FM_RATIOBITS)) / table2[i];
+        }
+    }
+}
+
+static inline void RebuildTimeTable(OPNA *opna)
+{
+    int p = opna->prescale;
+    opna->prescale = -1;
+    SetPrescaler(opna, p);
+}
+
+// Set volume. Just a dB->internal linear scale conversion here, nothing more.
+//
+void SetVolumeFM(OPNA *opna, int db)
+{
+    db = Min(db, 20);
+    if (db > -192)
+        opna->fmvolume = lrintf(16384.0f * expf((float)M_LN10*(db / 40.0f)));
+    else
+        opna->fmvolume = 0;
+}
+
+// Chip-internal TimerA() handler. All it does is implement CSM, i.e.
+// channel 3 will get keyed on and off whenever the TimerA() interrupt fires.
+// To the best of my knowledge, CSM was intended to be used to implement
+// primitive formant synthesis (which Yamaha later repackaged in a much more
+// elaborate and featured implementation in their FS1R), and used by
+// approximately nobody. It's also been removed from the YMF288/OPN3.
+//
+static void TimerA(OPNA *opna)
+{
+    int i;
+    if (opna->regtc & 0x80)
+    {
+        for(i=0; i<4; i++)
+            KeyOn(&opna->csmch->op[i]);
+        for(i=0; i<4; i++)
+            KeyOff(&opna->csmch->op[i]);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Clock timers. TimerA has a resolution of 9 microseconds (assuming standard
+// chip clockspeed of 8MHz, which all of this code of course does), and
+// on the Speak Board for the PC-9801 is used only for the purpose of sound effects.
+// TimerB, on the other hand, has a resolution of 144 microseconds, and is basically
+// used as the main chip clock. Also, binding "sound-effects" to TimerB (needed as
+// ZUN uses the sound-effects feature to implement PSG percussion) results in tiny
+// changes to the output file, precisely none of them audible, making TimerA
+// all but useless in this case. Note that TimerA is also used internally in the chip
+// to implement CSM-mode (see comment above).
+//
+uint8_t OPNATimerCount(OPNA *opna, int32_t us)
+{
+    uint8_t event = 0;
+
+    if (opna->timera_count) {
+        opna->timera_count -= us << 16;
+        if (opna->timera_count <= 0) {
+            event = 1;
+            TimerA(opna);
+
+            while (opna->timera_count <= 0)
+                opna->timera_count += opna->timera;
+
+            if (opna->regtc & 4) {
+                if (!(opna->status & 1)) {
+                    opna->status |= 1;
+                }
+            }
+        }
+    }
+    if (opna->timerb_count) {
+        opna->timerb_count -= us << 12;
+        if (opna->timerb_count <= 0) {
+            event = 1;
+            while (opna->timerb_count <= 0)
+                opna->timerb_count += opna->timerb;
+
+            if (opna->regtc & 8) {
+                if (!(opna->status & 2)) {
+                    opna->status |= 2;
+                }
+            }
+        }
+    }
+    return event;
+}
+
+// Rhythm source samples. pcm_s8 (*not* u8!), and found in rhythmdata.h,
+// which is included in rhythmdata.c in order to keep the size of the
+// object file that you get from compiling this file at a reasonable size,
+// for debugging/testing/sanity purposes.
+//
+extern const unsigned char* rhythmdata[6];
+static const unsigned int rhythmdatalen[6] = {
+    9013, 10674, 66610, 7259, 18562, 3042 
+};
+
+// ---------------------------------------------------------------------------
+// Main chip init routine.
+// c is the chip clock, which should never be set to anything other than 8MHz.
+// r is the chip samplerate, set to 44100 typically.
+// ipflag - if true, ignore the value of r, clock the "DAC" at the OPNA-internal
+// samplerate of 55466Hz, then downsample to whatever the actual value of r is.
+//
+uint8_t OPNAInit(OPNA *opna, uint c, uint r, uint8_t ipflag)
+{
+    int i;
+    opna->devmask = 0x7;
+    opna->prescale = 0;
+    opna->rate = 44100;
+    opna->mixl = 0;
+    opna->mixdelta = 16383;
+    opna->interpolation = false;
+
+    MakeTable();
+    for (i=0; i<6; i++) {
+        Ch4Init(&opna->ch[i]);
+        opna->rhythm[i].sample = 0;
+        opna->rhythm[i].pos = 0;
+        opna->rhythm[i].size = 0;
+        opna->rhythm[i].volume = 0;
+    }
+    opna->rhythmtvol = 0;
+    opna->csmch = &opna->ch[2];
+    for (i=0; i<6; i++)
+        opna->rhythm[i].pos = ~0;
+
+    for (i=0; i<6; i++)
+    {
+        uint8_t *file_buf = (uint8_t*)0;
+        uint32_t fsize;
+        file_buf = (uint8_t*)rhythmdata[i];
+        fsize = rhythmdatalen[i];
+        file_buf += 44;
+        fsize -= 44;
+        fsize /= 2;
+        opna->rhythm[i].sample = (int8_t*)file_buf;
+        opna->rhythm[i].rate = 44100;
+        opna->rhythm[i].step = opna->rhythm[i].rate * 1024 / opna->rate;
+        opna->rhythm[i].pos = opna->rhythm[i].size = fsize * 1024;
+    }
+
+    c /= 2;
+    opna->clock = c;
+    if (!OPNASetRate(opna, r, ipflag))
+        return false;
+    RebuildTimeTable(opna);
+    OPNAReset(opna);
+    PSGInit(&opna->psg);
+
+    SetVolumeFM(opna, 0);
+    SetVolumePSG(&opna->psg, 0);
+    OPNASetChannelMask(opna, ~0);
+    for (i=0; i<6; i++)
+        SetVolumeRhythm(opna, 0, 0);
+    return true;
+}
+
+// ---------------------------------------------------------------------------
+// Reset chip. Your standard routine, basically zeros everything in sight.
+//
+void OPNAReset(OPNA *opna)
+{
+    int i, j;
+
+    opna->status = 0;
+    SetPrescaler(opna, 0);
+    opna->timera_count = 0;
+    opna->timerb_count = 0;
+    PSGReset(&opna->psg);
+    opna->reg29 = 0x1f;
+    opna->rhythmkey = 0;
+    for (i=0x20; i<0x28; i++) OPNASetReg(opna, i, 0);
+    for (i=0x30; i<0xc0; i++) OPNASetReg(opna, i, 0);
+    for (i=0x130; i<0x1c0; i++) OPNASetReg(opna, i, 0);
+    for (i=0x100; i<0x110; i++) OPNASetReg(opna, i, 0);
+    for (i=0x10; i<0x20; i++) OPNASetReg(opna, i, 0);
+    for (i=0; i<6; i++)
+    {
+        opna->pan[i] = 3;
+        for(j=0; j<4; j++)
+            OperatorReset(&opna->ch[i].op[j]);
+    }
+
+    opna->statusnext = 0;
+    opna->lfocount = 0;
+    opna->status = 0;
+}
+
+// ---------------------------------------------------------------------------
+// Change OPNA "DAC" samplerate.
+// r and ipflag are as in OPNAInit(), above.
+//
+uint8_t OPNASetRate(OPNA *opna, uint r, uint8_t ipflag)
+{
+    int i, j;
+    opna->interpolation = ipflag;
+    opna->psgrate = r;
+    RebuildTimeTable(opna);
+    opna->lfodcount = opna->reg22 & 0x08 ? lfotab[opna->reg22 & 7] : 0;
+
+    for (i=0; i<6; i++) {
+        for (j=0; j<4; j++)
+            opna->ch[i].op[j].paramchanged = true;
+    }
+    for (i=0; i<6; i++) {
+        opna->rhythm[i].step = opna->rhythm[i].rate * 1024 / r;
+    }
+    return true;
+}
+
+// ---------------------------------------------------------------------------
+// Set OPNA channel mask. The 6 LSBs of mask are 0 to disable that FM channel,
+// and 1 to enable it. The next 3 LSBs are passed to PSGSetChannelMask() to,
+// well, set the PSG channel mask (which behaves the same way: 0 disables
+// a given channel and 1 enables it).
+//
+void OPNASetChannelMask(OPNA *opna, uint mask)
+{
+    int i, j;
+    for (i=0; i<6; i++) {
+        for (j=0; j<4; j++) {
+            opna->ch[i].op[j].mute = (!(mask & (1 << i)));
+            opna->ch[i].op[j].paramchanged = true;
+        }
+    }
+    PSGSetChannelMask(&opna->psg, (mask >> 6));
+}
+
+// ---------------------------------------------------------------------------
+// Main OPNA register-set routine. Really long and boring switch-case.
+// Basically taken directly from the manual - the only parts of the spec
+// that were even the least bit tricky to implement were the f-number tables,
+// everything else is basically obvious.
+//
+void OPNASetReg(OPNA *opna, uint addr, uint data)
+{
+    uint j, _dp = 0;
+    int c = addr & 3;
+    switch (addr)
+    {
+        uint modified;
+        uint tmp;
+
+    // Timer -----------------------------------------------------------------
+        case 0x24: case 0x25:
+            opna->regta[addr & 1] = (uint8_t)data;
+            tmp = (opna->regta[0] << 2) + (opna->regta[1] & 3);
+            opna->timera = (1024-tmp) * opna->timer_step;
+            break;
+
+        case 0x26:
+            opna->timerb = (256-data) * opna->timer_step;
+            break;
+
+        case 0x27:
+            tmp = opna->regtc ^ data;
+            opna->regtc = (uint8_t)data;
+            if (data & 0x10)
+                opna->status &= ~1;
+            if (data & 0x20)
+                opna->status &= ~2;
+            if (tmp & 0x01)
+                opna->timera_count = (data & 1) ? opna->timera : 0;
+            if (tmp & 0x02)
+                opna->timerb_count = (data & 2) ? opna->timerb : 0;
+            break;
+
+    // Misc ------------------------------------------------------------------
+    case 0x28:      // Key On/Off
+        if ((data & 3) < 3)
+        {
+            uint32_t key = (data >> 4);
+            c = (data & 3) + (data & 4 ? 3 : 0);
+            if (key & 0x1) KeyOn(&opna->ch[c].op[0]); else KeyOff(&opna->ch[c].op[0]);
+            if (key & 0x2) KeyOn(&opna->ch[c].op[1]); else KeyOff(&opna->ch[c].op[1]);
+            if (key & 0x4) KeyOn(&opna->ch[c].op[2]); else KeyOff(&opna->ch[c].op[2]);
+            if (key & 0x8) KeyOn(&opna->ch[c].op[3]); else KeyOff(&opna->ch[c].op[3]);
+        }
+        break;
+
+    // Status Mask -----------------------------------------------------------
+    case 0x29:
+        opna->reg29 = data;
+        break;
+
+    // Prescaler -------------------------------------------------------------
+    case 0x2d: case 0x2e: case 0x2f:
+        SetPrescaler(opna, (addr-0x2d));
+        break;
+
+    // F-Number --------------------------------------------------------------
+    case 0x1a0: case 0x1a1: case 0x1a2:
+        c += 3;
+    case 0xa0:  case 0xa1: case 0xa2:
+        opna->fnum[c] = data + opna->fnum2[c] * 0x100;
+        _dp = (opna->fnum[c] & 2047) << ((opna->fnum[c] >> 11) & 7);
+        for(j=0; j<4; j++) {
+            opna->ch[c].op[j].dp = _dp;
+            opna->ch[c].op[j].bn = notetab[(opna->fnum[c] >> 7) & 127];
+            opna->ch[c].op[j].paramchanged = true;
+        }
+        break;
+
+    case 0x1a4: case 0x1a5: case 0x1a6:
+        c += 3;
+    case 0xa4 : case 0xa5: case 0xa6:
+        opna->fnum2[c] = (uint8_t)data;
+        break;
+
+    case 0xa8:  case 0xa9: case 0xaa:
+        opna->fnum3[c] = data + opna->fnum2[c+6] * 0x100;
+        break;
+
+    case 0xac : case 0xad: case 0xae:
+        opna->fnum2[c+6] = (uint8_t)data;
+        break;
+
+    // Algorithm -------------------------------------------------------------
+    case 0x1b0: case 0x1b1:  case 0x1b2:
+        c += 3;
+    case 0xb0:  case 0xb1:  case 0xb2:
+        opna->ch[c].fb = fbtab[((data >> 3) & 7)];
+        SetAlgorithm(&opna->ch[c], data & 7);
+        break;
+
+    case 0x1b4: case 0x1b5: case 0x1b6:
+        c += 3;
+    case 0xb4: case 0xb5: case 0xb6:
+        opna->pan[c] = (data >> 6) & 3;
+        for(j=0; j<4; j++) {
+            opna->ch[c].op[j].ms = data;
+            opna->ch[c].op[j].paramchanged = true;
+        }
+        break;
+
+    // Rhythm ----------------------------------------------------------------
+    case 0x10:          // DM/KEYON
+        if (!(data & 0x80))  // KEY ON
+        {
+            opna->rhythmkey |= data & 0x3f;
+            if (data & 0x01) opna->rhythm[0].pos = 0;
+            if (data & 0x02) opna->rhythm[1].pos = 0;
+            if (data & 0x04) opna->rhythm[2].pos = 0;
+            if (data & 0x08) opna->rhythm[3].pos = 0;
+            if (data & 0x10) opna->rhythm[4].pos = 0;
+            if (data & 0x20) opna->rhythm[5].pos = 0;
+        }
+        else
+        {                   // DUMP
+            opna->rhythmkey &= ~data;
+        }
+        break;
+
+    case 0x11:
+        opna->rhythmtl = ~data & 63;
+        break;
+
+    case 0x18:      // Bass Drum
+    case 0x19:      // Snare Drum
+    case 0x1a:      // Top Cymbal
+    case 0x1b:      // Hihat
+    case 0x1c:      // Tom-tom
+    case 0x1d:      // Rim shot
+        opna->rhythm[addr & 7].pan   = (data >> 6) & 3;
+        opna->rhythm[addr & 7].level = ~data & 31;
+        break;
+
+    // LFO -------------------------------------------------------------------
+    case 0x22:
+        modified = opna->reg22 ^ data;
+        opna->reg22 = data;
+        if (modified & 0x8)
+            opna->lfocount = 0;
+        opna->lfodcount = opna->reg22 & 8 ? lfotab[opna->reg22 & 7] : 0;
+        break;
+
+    // PSG -------------------------------------------------------------------
+    case  0: case  1: case  2: case  3: case  4: case  5: case  6: case  7:
+    case  8: case  9: case 10: case 11: case 12: case 13: case 14: case 15:
+        PSGSetReg(&opna->psg, addr, data);
+        break;
+
+    // ADSR ------------------------------------------------------------------
+    default:
+        if (c < 3)
+        {
+            if (addr & 0x100)
+                c += 3;
+            {
+                uint8_t slottable[4] = { 0, 2, 1, 3 };
+                uint32_t slot = slottable[(addr >> 2) & 3];
+                FMOperator* op = &opna->ch[c].op[slot];
+        
+                switch ((addr >> 4) & 15)
+                {
+                case 3: // 30-3E DT/MULTI
+                    op->detune = (((data >> 4) & 0x07) * 0x20);
+                    op->multiple = (data & 0x0f);
+                    op->paramchanged = 1;
+                    break;
+        
+                case 4: // 40-4E TL
+                    if(!((opna->regtc & 0x80) && (opna->csmch == &opna->ch[c]))) {
+                        op->tl = (data & 0x7f);
+                        op->paramchanged = 1;
+                    }
+                    op->tll = (data & 0x7f);
+                    break;
+        
+                case 5: // 50-5E KS/AR
+                    op->ks = ((data >> 6) & 3);
+                    op->ar = ((data & 0x1f) * 2);
+                    op->paramchanged = 1;
+                    break;
+        
+                case 6: // 60-6E DR/AMON
+                    op->dr = ((data & 0x1f) * 2);
+                    op->amon = ((data & 0x80) != 0);
+                    op->paramchanged = 1;
+                    break;
+        
+                case 7: // 70-7E SR
+                    op->sr = ((data & 0x1f) * 2);
+                    op->paramchanged = 1;
+                    break;
+        
+                case 8: // 80-8E SL/RR
+                    op->sl = (((data >> 4) & 15) * 4); 
+                    op->rr = ((data & 0x0f) * 4 + 2);
+                    op->paramchanged = 1;
+                    break;
+        
+                case 9: // 90-9E SSG-EC
+                    op->ssgtype = (data & 0x0f);
+                    break;
+                }
+            }
+        }
+        break;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Read OPNA register. Pointless. Only SSG registers can be read, and of those
+// the only one anyone seems to be interested in reading is register 7,
+// which as I explain in detail in psg.c, is completely superfluous.
+//
+uint OPNAGetReg(OPNA *opna, uint addr)
+{
+    if (addr < 0x10)
+        return PSGGetReg(&opna->psg, addr);
+    if (addr == 0xff)
+        return 1;
+    return 0;
+}
+
+// ---------------------------------------------------------------------------
+
+static inline void MixSubSL(Channel4 ch[6], int activech, int32_t *dest)
+{
+    if (activech & 0x001) (*dest  = Ch4CalcL(&ch[0]));
+    if (activech & 0x004) (*dest += Ch4CalcL(&ch[1]));
+    if (activech & 0x010) (*dest += Ch4CalcL(&ch[2]));
+    if (activech & 0x040) (*dest += Ch4CalcL(&ch[3]));
+    if (activech & 0x100) (*dest += Ch4CalcL(&ch[4]));
+    if (activech & 0x400) (*dest += Ch4CalcL(&ch[5]));
+}
+
+static inline void MixSubS(Channel4 ch[6], int activech, int32_t *dest)
+{
+    if (activech & 0x001) (*dest  = Ch4Calc(&ch[0]));
+    if (activech & 0x004) (*dest += Ch4Calc(&ch[1]));
+    if (activech & 0x010) (*dest += Ch4Calc(&ch[2]));
+    if (activech & 0x040) (*dest += Ch4Calc(&ch[3]));
+    if (activech & 0x100) (*dest += Ch4Calc(&ch[4]));
+    if (activech & 0x400) (*dest += Ch4Calc(&ch[5]));
+}
+
+// ---------------------------------------------------------------------------
+// Mix FM channels and output. Mix6 runs at user-specified samplerate,
+// Mix6I runs at the chip samplerate of 55466Hz and then downsamples
+// to the user-specified samplerate. It is an open problem as to determining
+// if one of these sounds better than the other.
+//
+#define IStoSample(s)   ((Limit((s) >> 2, 0xffff, -0xffff) * opna->fmvolume) >> 14)
+//#define IStoSample(s) ((((s) >> 3) * fmvolume) >> 14)
+
+static void Mix6(OPNA *opna, Sample* buffer, uint32_t nsamples, int activech)
+{
+    Sample* limit = buffer + nsamples;
+    Sample* dest;
+    // Mix
+    int32_t ibuf;
+
+    for (dest = buffer; dest < limit; dest+=1)
+    {
+        ibuf = 0;
+        if (activech & 0xaaa)
+            LFO(opna), MixSubSL(opna->ch, activech, &ibuf);
+        else
+            MixSubS(opna->ch, activech, &ibuf);
+        dest[0] += IStoSample(ibuf);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// See comment above Mix6(), above.
+//
+static void Mix6I(OPNA *opna, Sample* buffer, uint32_t nsamples, int activech)
+{
+    // Mix
+    int32_t ibuf;
+
+    int32_t delta = opna->mixdelta;
+    Sample* limit = buffer + nsamples;
+    Sample *dest;
+    if (opna->mpratio < 16384)
+    {
+        for (dest = buffer; dest < limit; dest+=1)
+        {
+            int32_t l, d;
+            while (delta > 0)
+            {
+                ibuf = 0;
+                if (activech & 0xaaa)
+                    LFO(opna), MixSubSL(opna->ch, activech, &ibuf);
+                else
+                    MixSubS(opna->ch, activech, &ibuf);
+
+                l = IStoSample(ibuf);
+                d = Min(opna->mpratio, delta);
+                opna->mixl += l * d;
+                delta -= opna->mpratio;
+            }
+            dest[0] += (opna->mixl >> 14);
+            opna->mixl = l * (16384-d);
+            delta += 16384;
+        }
+    } else {
+        int impr = 16384 * 16384 / opna->mpratio;
+        for (dest = buffer; dest < limit; dest+=1)
+        {
+            if (delta < 0)
+            {
+                delta += 16384;
+                opna->mixl = opna->mixl1;
+
+                ibuf = 0;
+                if (activech & 0xaaa)
+                    LFO(opna), MixSubSL(opna->ch, activech, &ibuf);
+                else
+                    MixSubS(opna->ch, activech, &ibuf);
+
+                opna->mixl1 = IStoSample(ibuf);
+            }
+            int32_t l = (delta * opna->mixl + (16384 - delta) * opna->mixl1) / 16384;
+            dest[0] += l;
+            delta -= impr;
+        }
+    }
+    opna->mixdelta = delta;
+}
+
+// ---------------------------------------------------------------------------
+// Main FM output routine. Clocks all of the operators on the chip, then mixes
+// together the output using one of Mix6() or Mix6I() above, and then outputs
+// the result to OPNAMix, which is what the calling routine will actually use.
+// buffer should be a pointer to a buffer of type Sample (int32_t in this
+// implementation, though another used float and in principle int16_t *should*
+// be sufficient), and be of size at least equal to nsamples.
+//
+static void FMMix(OPNA *opna, Sample* buffer, uint32_t nsamples)
+{
+    uint j;
+    if (opna->fmvolume > 0)
+    {
+        // Set F-Number
+        if (!(opna->regtc & 0xc0)) {
+            uint _dp = (opna->fnum[opna->csmch-opna->ch] & 2047) << ((opna->fnum[opna->csmch-opna->ch] >> 11) & 7);
+            for(j=0; j<4; j++) {
+                opna->csmch->op[j].dp = _dp;
+                opna->csmch->op[j].bn = notetab[(opna->fnum[opna->csmch-opna->ch] >> 7) & 127];
+                opna->csmch->op[j].paramchanged = true;
+            }
+        } else {
+            SetFNum(&opna->csmch->op[0], opna->fnum3[1]); SetFNum(&opna->csmch->op[1], opna->fnum3[2]);
+            SetFNum(&opna->csmch->op[2], opna->fnum3[0]); SetFNum(&opna->csmch->op[3], opna->fnum[2]);
+        }
+
+        int act = (((Ch4Prepare(&opna->ch[2]) << 2) | Ch4Prepare(&opna->ch[1])) << 2) | Ch4Prepare(&opna->ch[0]);
+        if (opna->reg29 & 0x80)
+            act |= (Ch4Prepare(&opna->ch[3]) | ((Ch4Prepare(&opna->ch[4]) | (Ch4Prepare(&opna->ch[5]) << 2)) << 2)) << 6;
+        if (!(opna->reg22 & 0x08))
+            act &= 0x555;
+
+        if (act & 0x555)
+        {
+            if (opna->interpolation)
+                Mix6I(opna, buffer, nsamples, act);
+            else
+                Mix6(opna, buffer, nsamples, act);
+        } else {
+            opna->mixl = 0, opna->mixdelta = 16383;
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Mix Rhythm generator output. Boring, just takes the PCM samples,
+// multiplies them by the volume set for that rhythm sample, and then outputs
+// the appropriate length of sample for that given samplerate to buffer.
+// The same restrictions on buffer as in FMMix() above apply.
+//
+static void RhythmMix(OPNA *opna, Sample* buffer, uint32_t count)
+{
+    int i;
+    Sample *dest;
+    if (opna->rhythmtvol < 128 && opna->rhythm[0].sample && (opna->rhythmkey & 0x3f))
+    {
+        Sample* limit = buffer + count;
+        for (i=0; i<6; i++)
+        {
+            Rhythm *r = &opna->rhythm[i];
+            if ((opna->rhythmkey & (1 << i)) && r->level >= 0)
+            {
+                int db = Limit(opna->rhythmtl+r->level+r->volume, 95, -31);
+                int vol = tltab[FM_TLPOS + db];
+
+                for (dest = buffer; dest<limit && r->pos < r->size; dest+=1)
+                {
+                    int sample = ((r->sample[r->pos / 1024] << 8) * vol) >> 12;
+                    r->pos += r->step;
+                    dest[0] += sample;
+                }
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Main OPNA output routine. See FMMix(), RhythmMix() above and PSGMix()
+// in psg.c for details.
+//
+void OPNAMix(OPNA *opna, Sample* buffer, uint32_t nsamples)
+{
+    if(opna->devmask & 1) FMMix(opna, buffer, nsamples);
+    if(opna->devmask & 2) PSGMix(&opna->psg, buffer, nsamples);
+    if(opna->devmask & 4) RhythmMix(opna, buffer, nsamples);
+}
+
+// ---------------------------------------------------------------------------
+//  Table setup/generation routines.
+//  FIXME: unify cltab/tltab and then hardcode the result, it's tiny enough
+//         that we don't really need to bother with runtime init for it.
+//
+void MakeTable(void) {
+    int i, j;
+    if (tablemade)
+        return;
+
+    tablemade = true;
+    for (i=-FM_TLPOS; i<FM_TLENTS-FM_TLPOS; i++)
+    {
+        tltab[FM_TLPOS + i] = (uint32_t)(4096.0f * expf((float)M_LN2*(i * -16.0f / FM_TLENTS)))-1;
+//      LOG2("tltab[%4d] = 0x%.4x\n", i, tltab[FM_TLPOS+i]);
+    }
+    for (i=0; i<512; i++)
+    {
+        int c = (int)(((1 << 8) - 1) * expf((float)M_LN2*(-i / 64.0f)));
+#if 1
+        // ÀºÅÙÍÞÀ©
+//      c += 1 << 3;
+//      c &= ~1 << 3;
+        for (j=16; j>11; j--)
+        {
+            if ((1 << j) & c)
+            {
+                c &= ((1 << 11) - 1) << (j - 10);
+                break;
+            }
+        }
+#endif
+        cltab[i] = c;
+//      LOG2("cltab[%4d*2] = %6d\n", i, cltab[i*2]);
+    }
+    //       3       6,      12      30       60       240      420     / 720
+    //  1.000963
+    //  lfofref[level * max * wave];
+    //  pre = lfofref[level][pms * wave >> 8];
+    for (i=0; i<8; i++)
+    {
+        float pmb = pms[i];
+        for (j=0; j<FM_LFOENTS; j++)
+        {
+            pmtable[i][j] =
+                (int)(0x10000 * (expf((float)M_LN2*(pmb * (2*j - FM_LFOENTS+1) / (FM_LFOENTS-1)) - 1)));
+//          LOG4("pmtable[%d][%.2x] = %5d ", i, j, pmtable[i][j]);
+//          LOG1(" %7.2f\n", log(1. + pmtable[i][j] / 65536.) / log(2) * 1200);
+        }
+    }
+}
+