view fmgen/opna.c @ 1:83859b2e2bae

Add build instructions for a shared library.
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Tue, 21 May 2013 10:37:21 +0200
parents c55ea9478c80
children
line wrap: on
line source

// FIXME: move ugly-ass legalese somewhere where it won't be seen
// by anyone other than lawyers. (/dev/null would be ideal but sadly
// we live in an imperfect world).
/* Copyright (c) 2012/2013, Peter Barfuss
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met: 

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer. 
2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution. 

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */

#include <stdint.h>
#include <math.h>
#include <unistd.h>
#include "op.h"
#include "psg.h"
#include "opna.h"
static const uint8_t notetab[128] =
{
     0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  3,  3,  3,  3,  3,  3,
     4,  4,  4,  4,  4,  4,  4,  5,  6,  7,  7,  7,  7,  7,  7,  7,
     8,  8,  8,  8,  8,  8,  8,  9, 10, 11, 11, 11, 11, 11, 11, 11,
    12, 12, 12, 12, 12, 12, 12, 13, 14, 15, 15, 15, 15, 15, 15, 15,
    16, 16, 16, 16, 16, 16, 16, 17, 18, 19, 19, 19, 19, 19, 19, 19,
    20, 20, 20, 20, 20, 20, 20, 21, 22, 23, 23, 23, 23, 23, 23, 23,
    24, 24, 24, 24, 24, 24, 24, 25, 26, 27, 27, 27, 27, 27, 27, 27,
    28, 28, 28, 28, 28, 28, 28, 29, 30, 31, 31, 31, 31, 31, 31, 31,
};

static const int8_t dttab[256] =
{
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  2,  2,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,
      4,  6,  6,  6,  8,  8,  8, 10, 10, 12, 12, 14, 16, 16, 16, 16,
      2,  2,  2,  2,  4,  4,  4,  4,  4,  6,  6,  6,  8,  8,  8, 10,
     10, 12, 12, 14, 16, 16, 18, 20, 22, 24, 26, 28, 32, 32, 32, 32,
      4,  4,  4,  4,  4,  6,  6,  6,  8,  8,  8, 10, 10, 12, 12, 14,
     16, 16, 18, 20, 22, 24, 26, 28, 32, 34, 38, 40, 44, 44, 44, 44,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0, -2, -2, -2, -2, -2, -2, -2, -2, -4, -4, -4, -4,
     -4, -6, -6, -6, -8, -8, -8,-10,-10,-12,-12,-14,-16,-16,-16,-16,
     -2, -2, -2, -2, -4, -4, -4, -4, -4, -6, -6, -6, -8, -8, -8,-10,
    -10,-12,-12,-14,-16,-16,-18,-20,-22,-24,-26,-28,-32,-32,-32,-32,
     -4, -4, -4, -4, -4, -6, -6, -6, -8, -8, -8,-10,-10,-12,-12,-14,
    -16,-16,-18,-20,-22,-24,-26,-28,-32,-34,-38,-40,-44,-44,-44,-44,
};

static uint8_t gaintab[64] = {
    0xff, 0xea, 0xd7, 0xc5, 0xb5, 0xa6, 0x98, 0x8b, 0x80, 0x75, 0x6c, 0x63, 0x5a, 0x53, 0x4c, 0x46,
    0x40, 0x3b, 0x36, 0x31, 0x2d, 0x2a, 0x26, 0x23, 0x20, 0x1d, 0x1b, 0x19, 0x17, 0x15, 0x13, 0x12,
    0x10, 0x0f, 0x0e, 0x0c, 0x0b, 0x0a, 0x0a, 0x09, 0x08, 0x07, 0x07, 0x06, 0x06, 0x05, 0x05, 0x04,
    0x04, 0x04, 0x03, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x01,
};

// sinf(M_PI*(2*i+1)/1024.0f), i=0,...,511.
// Should make this twice as large (so a duplicate of the top 512, but with the other half of the
// interval [0,2*M_PI], therefore the negative of the first half), and then get rid of the
// silly hack in Sinetable(). However, I'm not actually sure which will use less gates on an FPGA,
// and there's really no speed difference on any machine newer than a 6502, probably.
static uint16_t sinetable[512] = {
    0x1, 0x2, 0x4, 0x5, 0x7, 0x9, 0xa, 0xc, 0xd, 0xf, 0x10, 0x12, 0x14, 0x15, 0x17, 0x18, 
    0x1a, 0x1b, 0x1d, 0x1f, 0x20, 0x22, 0x23, 0x25, 0x26, 0x28, 0x29, 0x2b, 0x2d, 0x2e, 0x30, 0x31, 
    0x33, 0x34, 0x36, 0x37, 0x39, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42, 0x44, 0x45, 0x47, 0x48, 0x4a, 
    0x4b, 0x4d, 0x4e, 0x50, 0x51, 0x53, 0x54, 0x56, 0x57, 0x58, 0x5a, 0x5b, 0x5d, 0x5e, 0x60, 0x61, 
    0x63, 0x64, 0x66, 0x67, 0x68, 0x6a, 0x6b, 0x6d, 0x6e, 0x70, 0x71, 0x72, 0x74, 0x75, 0x77, 0x78, 
    0x79, 0x7b, 0x7c, 0x7d, 0x7f, 0x80, 0x82, 0x83, 0x84, 0x86, 0x87, 0x88, 0x8a, 0x8b, 0x8c, 0x8e, 
    0x8f, 0x90, 0x91, 0x93, 0x94, 0x95, 0x97, 0x98, 0x99, 0x9a, 0x9c, 0x9d, 0x9e, 0x9f, 0xa1, 0xa2, 
    0xa3, 0xa4, 0xa5, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 
    0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 
    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 
    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xde, 0xdf, 0xe0, 0xe1, 0xe1, 
    0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe6, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xec, 
    0xed, 0xed, 0xee, 0xef, 0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3, 0xf3, 0xf4, 0xf4, 0xf5, 
    0xf5, 0xf6, 0xf6, 0xf7, 0xf7, 0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xf9, 0xfa, 0xfa, 0xfa, 0xfb, 0xfb, 
    0xfb, 0xfc, 0xfc, 0xfc, 0xfc, 0xfd, 0xfd, 0xfd, 0xfd, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff, 0xff, 
    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 
    0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
    0xff, 0xff, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfd, 0xfd, 0xfd, 0xfd, 0xfc, 0xfc, 0xfc, 0xfc, 0xfb, 
    0xfb, 0xfb, 0xfa, 0xfa, 0xfa, 0xf9, 0xf9, 0xf9, 0xf8, 0xf8, 0xf7, 0xf7, 0xf7, 0xf6, 0xf6, 0xf5, 
    0xf5, 0xf4, 0xf4, 0xf3, 0xf3, 0xf2, 0xf2, 0xf1, 0xf1, 0xf0, 0xf0, 0xef, 0xef, 0xee, 0xed, 0xed, 
    0xec, 0xec, 0xeb, 0xea, 0xea, 0xe9, 0xe8, 0xe8, 0xe7, 0xe6, 0xe6, 0xe5, 0xe4, 0xe4, 0xe3, 0xe2, 
    0xe1, 0xe1, 0xe0, 0xdf, 0xde, 0xde, 0xdd, 0xdc, 0xdb, 0xda, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 
    0xd4, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0, 0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 
    0xc5, 0xc4, 0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8, 0xb7, 0xb6, 
    0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa5, 0xa4, 0xa3, 
    0xa2, 0xa1, 0x9f, 0x9e, 0x9d, 0x9c, 0x9a, 0x99, 0x98, 0x97, 0x95, 0x94, 0x93, 0x91, 0x90, 0x8f, 
    0x8e, 0x8c, 0x8b, 0x8a, 0x88, 0x87, 0x86, 0x84, 0x83, 0x82, 0x80, 0x7f, 0x7d, 0x7c, 0x7b, 0x79, 
    0x78, 0x77, 0x75, 0x74, 0x72, 0x71, 0x70, 0x6e, 0x6d, 0x6b, 0x6a, 0x68, 0x67, 0x66, 0x64, 0x63, 
    0x61, 0x60, 0x5e, 0x5d, 0x5b, 0x5a, 0x58, 0x57, 0x56, 0x54, 0x53, 0x51, 0x50, 0x4e, 0x4d, 0x4b, 
    0x4a, 0x48, 0x47, 0x45, 0x44, 0x42, 0x40, 0x3f, 0x3d, 0x3c, 0x3a, 0x39, 0x37, 0x36, 0x34, 0x33, 
    0x31, 0x30, 0x2e, 0x2d, 0x2b, 0x29, 0x28, 0x26, 0x25, 0x23, 0x22, 0x20, 0x1f, 0x1d, 0x1b, 0x1a, 
    0x18, 0x17, 0x15, 0x14, 0x12, 0x10, 0xf, 0xd, 0xc, 0xa, 0x9, 0x7, 0x6, 0x4, 0x2, 0x1, 
};

static uint8_t tablemade = false;
static uint8_t cltab[512];
static uint32_t tltab[FM_TLENTS];
static uint32_t lfotab[8];
static const uint8_t fbtab[8] = { 31, 7, 6, 5, 4, 3, 2, 1 };

/* Amplitude/Phase modulation tables. */
static const float pms[8] = { 0, 1/720., 2/720., 3/720.,  4/720.,  6/720., 12/720.,  24/720. };    // OPNA
static const uint8_t amt[4] = { 29, 4, 2, 1 }; // OPNA
static int     pmtable[8][FM_LFOENTS];
uint8_t aml, pml;
int     pmv;

// ---------------------------------------------------------------------------
static inline void LFO(OPNA *opna)
{
    uint8_t c = (opna->lfocount >> FM_LFOCBITS) & 0xff;
    opna->lfocount += opna->lfodcount;
    if (c < 0x40)       pml = c * 2 + 0x80;
    else if (c < 0xc0)  pml = 0x7f - (c - 0x40) * 2 + 0x80;
    else                pml = (c - 0xc0) * 2;
    if (c < 0x80)       aml = (c << 1);
    else                aml = ~(c << 1);
}

// ---------------------------------------------------------------------------
// Magic. No, really.
// In reality this just initialises some tables used by everything else,
// that are dependent on both the chip clock and the "DAC" samplerate.
// The hilarious thing though is that this is really the only place where
// the chip clock value gets actually *used*, and even then it's indirectly
// via the ratio parameter.
//
static uint32_t currentratio = ~0;
static float rr;
static uint32_t ratetable[64];
static void MakeTimeTable(uint32_t ratio)
{
    int h, l;

    if (ratio != currentratio)
    {
        currentratio = ratio;
        // PG Part
        rr = (float)ratio / (1 << (2 + FM_RATIOBITS - FM_PGBITS));

        // EG
        for (h=1; h<16; h++)
        {
            for (l=0; l<4; l++)
            {
                int m = h == 15 ? 8 : l+4;
                ratetable[h*4+l] =
                    ((ratio << (FM_EGBITS - 3 - FM_RATIOBITS)) << Min(h, 11)) * m;
            }
        }
        ratetable[0] = ratetable[1] = ratetable[2] = ratetable[3] = 0;
        ratetable[5] = ratetable[4],  ratetable[7] = ratetable[6];
    }
}

static void SetEGRate(FMOperator *op, uint r)
{
    op->egstepd = ratetable[r];
    op->egtransa = Limit(15 - (r>>2), 4, 1);
    op->egtransd = 16 >> op->egtransa;
}

// Standard operator init routine. Zeros out some more stuff
// than OperatorReset() does, then calls OperatorReset().
//
void OperatorInit(FMOperator *op)
{
    // EG Part
    op->ar = op->dr = op->sr = op->rr = op->ksr = 0;
    op->ams = 0;
    op->mute = false;
    op->keyon = false;

    // PG Part
    op->multiple = 0;
    op->detune = 0;

    // LFO
    op->ms = 0;

    OperatorReset(op);
}

// Standard operator reset routine. Init EG/PG to defaults,
// clear any stored samples, then force a reinit of EG/PG
// in OperatorPrepare() below by setting paramchanged to 1.
//
void OperatorReset(FMOperator *op)
{
    // EG part
    op->tl = op->tll = 127;
    op->eglevel = 0xff;
    op->eglvnext = 0x100;
    SetEGRate(op, 0);
    op->phase = off;
    op->egstep = 0;

    // PG part
    op->pgcount = 0;

    // OP part
    op->out = op->out2 = 0;
    op->paramchanged = true;
}

// Init EG, PG.
// PG init is trivial, simply set pgdcount (phase counter increment)
// based on multiple, detune and bn.
// See Pages 24-26 of the OPNA manual for details.
// EG init is your standard ADSR state machine. Should (hopefully!)
// be self-explanatory, especially if you've ever seen a software implementation
// of ADSR before (seriously, they're all the damn same).
//
void OperatorPrepare(FMOperator *op)
{
    if (op->paramchanged)
    {
        uint8_t l = ((op->multiple) ? 2*op->multiple : 1);
        op->paramchanged = false;
        //  PG Part
        op->pgdcount = (op->dp + dttab[op->detune + op->bn]) * (uint32_t)(l * rr);
        op->pgdcountl = op->pgdcount >> 11;

        // EG Part
        op->ksr = op->bn >> (3-op->ks);

        switch (op->phase)
        {
        case attack:
            SetEGRate(op, op->ar ? Min(63, op->ar+op->ksr) : 0);
            break;
        case decay:
            SetEGRate(op, op->dr ? Min(63, op->dr+op->ksr) : 0);
            op->eglvnext = op->sl * 8;
            break;
        case sustain:
            SetEGRate(op, op->sr ? Min(63, op->sr+op->ksr) : 0);
            break;
        case release:
            SetEGRate(op, Min(63, op->rr+op->ksr));
            break;
        case next: // temporal
            break;
        case off:  // temporal
            break;
        }
        // LFO
        op->ams = (op->amon ? (op->ms >> 4) & 3 : 0);
    }
}

// FIXME: Rename. "Phase" here refers to ADSR DFA state,
// not PG/sine table phase. Also, yeah, this does the
// ADSR DFA state transitions.
//
static void ShiftPhase(FMOperator *op, EGPhase nextphase)
{
    switch (nextphase)
    {
    case attack:        // Attack Phase
        op->tl = op->tll;
        if ((op->ar+op->ksr) < 62) {
            SetEGRate(op, op->ar ? Min(63, op->ar+op->ksr) : 0);
            op->phase = attack;
            break;
        }
    case decay:         // Decay Phase
        if (op->sl) {
            op->eglevel = 0;
            op->eglvnext = op->sl*8;
            SetEGRate(op, op->dr ? Min(63, op->dr+op->ksr) : 0);
            op->phase = decay;
            break;
        }
    case sustain:       // Sustain Phase
        op->eglevel = op->sl*8;
        op->eglvnext = 0x100;
        SetEGRate(op, op->sr ? Min(63, op->sr+op->ksr) : 0);
        op->phase = sustain;
        break;

    case release:       // Release Phase
        if (op->phase == attack || (op->eglevel < 0x100/* && phase != off*/)) {
            op->eglvnext = 0x100;
            SetEGRate(op, Min(63, op->rr+op->ksr));
            op->phase = release;
            break;
        }
    case off:           // off
    default:
        op->eglevel = 0xff;
        op->eglvnext = 0x100;
        SetEGRate(op, 0);
        op->phase = off;
        break;
    }
}

//  Block/F-Num
static inline void SetFNum(FMOperator *op, uint f)
{
    op->dp = (f & 2047) << ((f >> 11) & 7);
    op->bn = notetab[(f >> 7) & 127];
    op->paramchanged = true;
}

// Clock the EG for one operator.
// Essentially just a call to ShiftPhase,
// but decrements the output EG level if starting
// from the attack phase, otherwise incrementing it.
// Should probably integrate the special case for attack
// from ShiftPhase() directly into here at some point.
void EGCalc(FMOperator *op)
{
    op->egstep += 3L << (11 + FM_EGBITS);
    if (op->phase == attack)
    {
        op->eglevel -= 1 + (op->eglevel >> op->egtransa);
        if (op->eglevel <= 0)
            ShiftPhase(op, decay);
    }
    else
    {
        op->eglevel += op->egtransd;
        if (op->eglevel >= op->eglvnext)
            ShiftPhase(op, (EGPhase)(op->phase+1));
    }
}

// KeyOn, hopefully obvious.
static void KeyOn(FMOperator *op)
{
    if (!op->keyon) {
        op->keyon = true;
        if (op->phase == off || op->phase == release) {
            ShiftPhase(op, attack);
            op->out = op->out2 = 0;
            op->pgcount = 0;
        }
    }
}

// KeyOff, hopefully obvious.
static void KeyOff(FMOperator *op)
{
    if (op->keyon) {
        op->keyon = false;
        ShiftPhase(op, release);
    }
}

// PG uses 9 bits, with the table itsself using another 10 bits.
// The top bits are the actually relevant ones, given that the PG increment will basically set
// the lowest few bits to nonsense.
// The hack there that checks for bit 10 in the right place and if yes, does some strange xor magic
// makes the value of Sine() negative if we're in the top half of the [0,2*M_PI] interval.
// It is, of course, one/two's complement specific, but I have yet to hear of an integer arithmetic implementation
// on any modern machine that isn't at least one of those two. (In fact, I think they're all two's complement, even).
#define Sine(s) sinetable[((s) >> (20+FM_PGBITS-FM_OPSINBITS))&(FM_OPSINENTS/2-1)]^(-(((s) & 0x10000000) >> 27))
//#define LogToLin(x) cltab[x]

static inline uint32_t LogToLin(uint32_t x) {
    if(x >= 1024) {
        return 0;
    }
    return cltab[x];
}

//  PG clock routine.
//  Does this really need to be in its own function anymore?
//  It's literally just a trivial increment of a counter now, nothing more.
//  Its output, btw, is 2^(20+PGBITS) / cycle, with PGBITS=9 in this implementation.
static inline uint32_t PGCalc(FMOperator *op)
{
    uint32_t ret = op->pgcount;
    op->pgcount += op->pgdcount;
    return ret;
}

// Same as above, but with PM if enabled.
// Same comments as above apply.
static inline uint32_t PGCalcL(FMOperator *op)
{
    uint32_t ret = op->pgcount;
    op->pgcount += op->pgdcount + ((op->pgdcountl * pmv) >> 5);
    return ret;
}

// Clock one FM operator. Does a lookup in the sine table
// for the waveform to output, possibly frequency-modulating
// that with the contents of in, then clocks the Phase Generator
// for that operator, stores the output sample and returns.
// Should probably integrate PGCalc() into this function,
// at some point at least.
static inline int32_t Calc(FMOperator *op, int32_t in)
{
    int32_t tmp = Sine(op->pgcount + (in << 7));
    PGCalc(op);
    op->out = op->egout*tmp;
    return op->out;
}

// Version of the above that gets used when the chip-internal LFO
// is enabled. Basically identical to the above, except with more Vibrato.
static inline int32_t CalcL(FMOperator *op, int32_t in)
{
    int32_t tmp = Sine(op->pgcount + (in << 7));
    PGCalcL(op);
    op->out = op->egout*tmp;
    return op->out;
}

// Clock operator 0. OP0 is special as it does not take an input from
// another operator, rather it can frequency-modulate itsself via the
// fb parameter (which specifies feedback amount). This is incredibly
// useful, and makes it possible to define a lot more instruments
// for the OPNA than you'd be able to otherwise.
#define FM_PRECISEFEEDBACK 1
static inline void CalcFB(FMOperator *op, uint fb)
{
    int32_t tmp;
    int32_t in = op->out + op->out2;
    op->out2 = op->out;
    if (FM_PRECISEFEEDBACK && fb == 31)
        tmp = Sine(op->pgcount);
    else
        tmp = Sine(op->pgcount + ((in << 6) >> fb));

    PGCalc(op);
    op->out = op->egout*tmp;
}

// Version of the above, but with 100% as much LFO as the above.
// See comment above CalcL() for details/explanation.
static inline void CalcFBL(FMOperator *op, uint fb)
{
    int32_t tmp;
    int32_t in = op->out + op->out2;
    op->out2 = op->out;

    if (FM_PRECISEFEEDBACK && fb == 31)
        tmp = Sine(op->pgcount);
    else
        tmp = Sine(op->pgcount + ((in << 6) >> fb));

    PGCalcL(op);
    op->out = op->egout*tmp;
}

// ---------------------------------------------------------------------------
//  4-op Channel
//  Sets the "algorithm", i.e. the connections between individual operators
//  in a channel. See Page 22 of the manual for pretty drawings of all of the
//  different algorithms supported by the OPNA.
//
static void SetAlgorithm(Channel4 *ch4, uint algo)
{
    static const uint8_t table1[8][6] =
    {
        { 0, 1, 1, 2, 2, 3 },   { 1, 0, 0, 1, 1, 2 },
        { 1, 1, 1, 0, 0, 2 },   { 0, 1, 2, 1, 1, 2 },
        { 0, 1, 2, 2, 2, 1 },   { 0, 1, 0, 1, 0, 1 },
        { 0, 1, 2, 1, 2, 1 },   { 1, 0, 1, 0, 1, 0 },
    };

    ch4->idx[0] = table1[algo][0]; // in[0];
    ch4->idx[1] = table1[algo][2]; // in[1];
    ch4->idx[2] = table1[algo][4]; // in[2];
    ch4->idx[3] = table1[algo][1]; // out[0];
    ch4->idx[4] = table1[algo][3]; // out[1];
    ch4->idx[5] = table1[algo][5]; // out[2];
    ch4->op[0].out2 = ch4->op[0].out = 0;
}

static inline void Ch4Init(Channel4 *ch4)
{
    int i;
    for(i=0; i<4; i++) {
        OperatorInit(&ch4->op[i]);
    }
    SetAlgorithm(ch4, 0);
    ch4->pms = pmtable[0];
}

// Reinit all operators on a given channel if paramchanged=true
// for that channel, set the PM table for that channel, then determine
// if there is any output from this channel, based on:
// - mute state of each operator
// - keyon state of each operator
// - AM (Tremolo) enable for each operator.
// Bit 0 of the return value is set if there is any output,
// Bit 1 is set if tremolo is enabled for any of the operators on this
// channel.
static inline int Ch4Prepare(Channel4 *ch4)
{
    OperatorPrepare(&ch4->op[0]);
    OperatorPrepare(&ch4->op[1]);
    OperatorPrepare(&ch4->op[2]);
    OperatorPrepare(&ch4->op[3]);

    ch4->pms = pmtable[ch4->op[0].ms & 7];
    if(ch4->op[0].mute && ch4->op[1].mute && ch4->op[2].mute && ch4->op[3].mute) return 0;
    int key = (IsOn(&ch4->op[0]) | IsOn(&ch4->op[1]) | IsOn(&ch4->op[2]) | IsOn(&ch4->op[3])) ? 1 : 0;
    int lfo = ch4->op[0].ms & (ch4->op[0].amon | ch4->op[1].amon | ch4->op[2].amon | ch4->op[3].amon ? 0x37 : 7) ? 2 : 0;
    return key | lfo;
}

// Clock one channel. Clocks all the Envelope Generators in parallel
// (well, okay, in sequence, but a hardware implementation *should*
//  clock them in parallel as they are completely independent tasks,
//  all that is important is that you don't execute Calc{L,FB,FBL}
//  until all of the EGs are done clocking - but that should be, again,
//  straightforward to implement in hardware).
//
int32_t Ch4Calc(Channel4 *ch4)
{
    int i;
    ch4->buf[1] = ch4->buf[2] = ch4->buf[3] = 0;
    for(i=0; i<4; i++) {
        if ((ch4->op[i].egstep -= ch4->op[i].egstepd) < 0)
            EGCalc(&ch4->op[i]);
        ch4->op[i].egout = LogToLin(ch4->op[i].eglevel)*gaintab[ch4->op[i].tl];
    }

    ch4->buf[0] = ch4->op[0].out; CalcFB(&ch4->op[0], ch4->fb);
    ch4->buf[ch4->idx[3]] += Calc(&ch4->op[1], ch4->buf[ch4->idx[0]]);
    ch4->buf[ch4->idx[4]] += Calc(&ch4->op[2], ch4->buf[ch4->idx[1]]);
    int o = ch4->op[3].out;
    Calc(&ch4->op[3], ch4->buf[ch4->idx[2]]);
    return ((ch4->buf[ch4->idx[5]] + o) >> 8);
}

// Same as above, but with LFO. Should see if I can merge the two somehow and just set
// a flag whenever I want to mix in Vibrato/Tremolo effects. Also, this code is basically
// completely untested, due to the surprising difficulty of finding test samples that
// actually use the chip-internal LFO. (And if you've somehow found one of those,
// now try *also* finding a good-quality recording of it being played back on the chip
// itsself. I should go ask the folks at soundshock.se or something, come to think of it).
//
int32_t Ch4CalcL(Channel4 *ch4)
{
    int i;
    pmv = ch4->pms[pml];
    ch4->buf[1] = ch4->buf[2] = ch4->buf[3] = 0;
    for(i=0; i<4; i++) {
        if ((ch4->op[i].egstep -= ch4->op[i].egstepd) < 0)
            EGCalc(&ch4->op[i]);
        ch4->op[i].egout = (LogToLin(ch4->op[i].eglevel + (aml >> amt[ch4->op[i].ams]))*gaintab[ch4->op[i].tl]);
    }

    ch4->buf[0] = ch4->op[0].out; CalcFBL(&ch4->op[0], ch4->fb);
    ch4->buf[ch4->idx[3]] += CalcL(&ch4->op[1], ch4->buf[ch4->idx[0]]);
    ch4->buf[ch4->idx[4]] += CalcL(&ch4->op[2], ch4->buf[ch4->idx[1]]);
    int o = ch4->op[3].out;
    CalcL(&ch4->op[3], ch4->buf[ch4->idx[2]]);
    return ((ch4->buf[ch4->idx[5]] + o) >> 8);
}

// This essentially initializes a couple constant tables
// and chip-specific parameters based on what the chip clock and "DAC" samplerate
// were set to in OPNAInit(). psgrate is always equal to the user-requested samplerate,
// whereas rate is only equal to that in the interpolation=false case, otherwise
// it's set to whatever value is needed to downsample 55466Hz to the user-requested
// samplerate, which will (almost?) always be either 44100Hz or 48000Hz.
// TODO: better-quality resampling may be of use here, possibly.
//
static void SetPrescaler(OPNA *opna, uint32_t p)
{
    static const char table[3][2] = { { 6, 4 }, { 3, 2 }, { 2, 1 } };
    static const uint8_t table2[8] = { 109,  78,  72,  68,  63,  45,  9,  6 };
    // 512
    if (opna->prescale != p)
    {
        opna->prescale = p;
        uint32_t i, fmclock = opna->clock / table[p][0] / 12;

        if (opna->interpolation) {
            opna->rate = fmclock * 2;
            do {
                opna->rate >>= 1;
                opna->mpratio = opna->psgrate * 16384 / opna->rate;
            } while (opna->mpratio <= 8192);
        } else {
            opna->rate = opna->psgrate;
        }
        uint32_t ratio = ((fmclock << FM_RATIOBITS) + opna->rate/2) / opna->rate;
        opna->timer_step = (int32_t)(1000000.0f * 65536.0f/fmclock);
        MakeTimeTable(ratio);
        PSGSetClock(&opna->psg, opna->clock / table[p][1], opna->psgrate);

        for (i=0; i<8; i++) {
            lfotab[i] = (ratio << (1+FM_LFOCBITS-FM_RATIOBITS)) / table2[i];
        }
    }
}

static inline void RebuildTimeTable(OPNA *opna)
{
    int p = opna->prescale;
    opna->prescale = -1;
    SetPrescaler(opna, p);
}

// Set volume. Just a dB->internal linear scale conversion here, nothing more.
//
void SetVolumeFM(OPNA *opna, int db)
{
    db = Min(db, 20);
    if (db > -192)
        opna->fmvolume = lrintf(16384.0f * expf((float)M_LN10*(db / 40.0f)));
    else
        opna->fmvolume = 0;
}

// Chip-internal TimerA() handler. All it does is implement CSM, i.e.
// channel 3 will get keyed on and off whenever the TimerA() interrupt fires.
// To the best of my knowledge, CSM was intended to be used to implement
// primitive formant synthesis (which Yamaha later repackaged in a much more
// elaborate and featured implementation in their FS1R), and used by
// approximately nobody. It's also been removed from the YMF288/OPN3.
//
static void TimerA(OPNA *opna)
{
    int i;
    if (opna->regtc & 0x80)
    {
        for(i=0; i<4; i++)
            KeyOn(&opna->csmch->op[i]);
        for(i=0; i<4; i++)
            KeyOff(&opna->csmch->op[i]);
    }
}

// ---------------------------------------------------------------------------
// Clock timers. TimerA has a resolution of 9 microseconds (assuming standard
// chip clockspeed of 8MHz, which all of this code of course does), and
// on the Speak Board for the PC-9801 is used only for the purpose of sound effects.
// TimerB, on the other hand, has a resolution of 144 microseconds, and is basically
// used as the main chip clock. Also, binding "sound-effects" to TimerB (needed as
// ZUN uses the sound-effects feature to implement PSG percussion) results in tiny
// changes to the output file, precisely none of them audible, making TimerA
// all but useless in this case. Note that TimerA is also used internally in the chip
// to implement CSM-mode (see comment above).
//
uint8_t OPNATimerCount(OPNA *opna, int32_t us)
{
    uint8_t event = 0;

    if (opna->timera_count) {
        opna->timera_count -= us << 16;
        if (opna->timera_count <= 0) {
            event = 1;
            TimerA(opna);

            while (opna->timera_count <= 0)
                opna->timera_count += opna->timera;

            if (opna->regtc & 4) {
                if (!(opna->status & 1)) {
                    opna->status |= 1;
                }
            }
        }
    }
    if (opna->timerb_count) {
        opna->timerb_count -= us << 12;
        if (opna->timerb_count <= 0) {
            event = 1;
            while (opna->timerb_count <= 0)
                opna->timerb_count += opna->timerb;

            if (opna->regtc & 8) {
                if (!(opna->status & 2)) {
                    opna->status |= 2;
                }
            }
        }
    }
    return event;
}

// Rhythm source samples. pcm_s8 (*not* u8!), and found in rhythmdata.h,
// which is included in rhythmdata.c in order to keep the size of the
// object file that you get from compiling this file at a reasonable size,
// for debugging/testing/sanity purposes.
//
extern const unsigned char* rhythmdata[6];
static const unsigned int rhythmdatalen[6] = {
    9013, 10674, 66610, 7259, 18562, 3042 
};

// ---------------------------------------------------------------------------
// Main chip init routine.
// c is the chip clock, which should never be set to anything other than 8MHz.
// r is the chip samplerate, set to 44100 typically.
// ipflag - if true, ignore the value of r, clock the "DAC" at the OPNA-internal
// samplerate of 55466Hz, then downsample to whatever the actual value of r is.
//
uint8_t OPNAInit(OPNA *opna, uint c, uint r, uint8_t ipflag)
{
    int i;
    opna->devmask = 0x7;
    opna->prescale = 0;
    opna->rate = 44100;
    opna->mixl = 0;
    opna->mixdelta = 16383;
    opna->interpolation = false;

    MakeTable();
    for (i=0; i<6; i++) {
        Ch4Init(&opna->ch[i]);
        opna->rhythm[i].sample = 0;
        opna->rhythm[i].pos = 0;
        opna->rhythm[i].size = 0;
        opna->rhythm[i].volume = 0;
    }
    opna->rhythmtvol = 0;
    opna->csmch = &opna->ch[2];
    for (i=0; i<6; i++)
        opna->rhythm[i].pos = ~0;

    for (i=0; i<6; i++)
    {
        uint8_t *file_buf = (uint8_t*)0;
        uint32_t fsize;
        file_buf = (uint8_t*)rhythmdata[i];
        fsize = rhythmdatalen[i];
        file_buf += 44;
        fsize -= 44;
        fsize /= 2;
        opna->rhythm[i].sample = (int8_t*)file_buf;
        opna->rhythm[i].rate = 44100;
        opna->rhythm[i].step = opna->rhythm[i].rate * 1024 / opna->rate;
        opna->rhythm[i].pos = opna->rhythm[i].size = fsize * 1024;
    }

    c /= 2;
    opna->clock = c;
    if (!OPNASetRate(opna, r, ipflag))
        return false;
    RebuildTimeTable(opna);
    OPNAReset(opna);
    PSGInit(&opna->psg);

    SetVolumeFM(opna, 0);
    SetVolumePSG(&opna->psg, 0);
    OPNASetChannelMask(opna, ~0);
    for (i=0; i<6; i++)
        SetVolumeRhythm(opna, 0, 0);
    return true;
}

// ---------------------------------------------------------------------------
// Reset chip. Your standard routine, basically zeros everything in sight.
//
void OPNAReset(OPNA *opna)
{
    int i, j;

    opna->status = 0;
    SetPrescaler(opna, 0);
    opna->timera_count = 0;
    opna->timerb_count = 0;
    PSGReset(&opna->psg);
    opna->reg29 = 0x1f;
    opna->rhythmkey = 0;
    for (i=0x20; i<0x28; i++) OPNASetReg(opna, i, 0);
    for (i=0x30; i<0xc0; i++) OPNASetReg(opna, i, 0);
    for (i=0x130; i<0x1c0; i++) OPNASetReg(opna, i, 0);
    for (i=0x100; i<0x110; i++) OPNASetReg(opna, i, 0);
    for (i=0x10; i<0x20; i++) OPNASetReg(opna, i, 0);
    for (i=0; i<6; i++)
    {
        opna->pan[i] = 3;
        for(j=0; j<4; j++)
            OperatorReset(&opna->ch[i].op[j]);
    }

    opna->statusnext = 0;
    opna->lfocount = 0;
    opna->status = 0;
}

// ---------------------------------------------------------------------------
// Change OPNA "DAC" samplerate.
// r and ipflag are as in OPNAInit(), above.
//
uint8_t OPNASetRate(OPNA *opna, uint r, uint8_t ipflag)
{
    int i, j;
    opna->interpolation = ipflag;
    opna->psgrate = r;
    RebuildTimeTable(opna);
    opna->lfodcount = opna->reg22 & 0x08 ? lfotab[opna->reg22 & 7] : 0;

    for (i=0; i<6; i++) {
        for (j=0; j<4; j++)
            opna->ch[i].op[j].paramchanged = true;
    }
    for (i=0; i<6; i++) {
        opna->rhythm[i].step = opna->rhythm[i].rate * 1024 / r;
    }
    return true;
}

// ---------------------------------------------------------------------------
// Set OPNA channel mask. The 6 LSBs of mask are 0 to disable that FM channel,
// and 1 to enable it. The next 3 LSBs are passed to PSGSetChannelMask() to,
// well, set the PSG channel mask (which behaves the same way: 0 disables
// a given channel and 1 enables it).
//
void OPNASetChannelMask(OPNA *opna, uint mask)
{
    int i, j;
    for (i=0; i<6; i++) {
        for (j=0; j<4; j++) {
            opna->ch[i].op[j].mute = (!(mask & (1 << i)));
            opna->ch[i].op[j].paramchanged = true;
        }
    }
    PSGSetChannelMask(&opna->psg, (mask >> 6));
}

// ---------------------------------------------------------------------------
// Main OPNA register-set routine. Really long and boring switch-case.
// Basically taken directly from the manual - the only parts of the spec
// that were even the least bit tricky to implement were the f-number tables,
// everything else is basically obvious.
//
void OPNASetReg(OPNA *opna, uint addr, uint data)
{
    uint j, _dp = 0;
    int c = addr & 3;
    switch (addr)
    {
        uint modified;
        uint tmp;

    // Timer -----------------------------------------------------------------
        case 0x24: case 0x25:
            opna->regta[addr & 1] = (uint8_t)data;
            tmp = (opna->regta[0] << 2) + (opna->regta[1] & 3);
            opna->timera = (1024-tmp) * opna->timer_step;
            break;

        case 0x26:
            opna->timerb = (256-data) * opna->timer_step;
            break;

        case 0x27:
            tmp = opna->regtc ^ data;
            opna->regtc = (uint8_t)data;
            if (data & 0x10)
                opna->status &= ~1;
            if (data & 0x20)
                opna->status &= ~2;
            if (tmp & 0x01)
                opna->timera_count = (data & 1) ? opna->timera : 0;
            if (tmp & 0x02)
                opna->timerb_count = (data & 2) ? opna->timerb : 0;
            break;

    // Misc ------------------------------------------------------------------
    case 0x28:      // Key On/Off
        if ((data & 3) < 3)
        {
            uint32_t key = (data >> 4);
            c = (data & 3) + (data & 4 ? 3 : 0);
            if (key & 0x1) KeyOn(&opna->ch[c].op[0]); else KeyOff(&opna->ch[c].op[0]);
            if (key & 0x2) KeyOn(&opna->ch[c].op[1]); else KeyOff(&opna->ch[c].op[1]);
            if (key & 0x4) KeyOn(&opna->ch[c].op[2]); else KeyOff(&opna->ch[c].op[2]);
            if (key & 0x8) KeyOn(&opna->ch[c].op[3]); else KeyOff(&opna->ch[c].op[3]);
        }
        break;

    // Status Mask -----------------------------------------------------------
    case 0x29:
        opna->reg29 = data;
        break;

    // Prescaler -------------------------------------------------------------
    case 0x2d: case 0x2e: case 0x2f:
        SetPrescaler(opna, (addr-0x2d));
        break;

    // F-Number --------------------------------------------------------------
    case 0x1a0: case 0x1a1: case 0x1a2:
        c += 3;
    case 0xa0:  case 0xa1: case 0xa2:
        opna->fnum[c] = data + opna->fnum2[c] * 0x100;
        _dp = (opna->fnum[c] & 2047) << ((opna->fnum[c] >> 11) & 7);
        for(j=0; j<4; j++) {
            opna->ch[c].op[j].dp = _dp;
            opna->ch[c].op[j].bn = notetab[(opna->fnum[c] >> 7) & 127];
            opna->ch[c].op[j].paramchanged = true;
        }
        break;

    case 0x1a4: case 0x1a5: case 0x1a6:
        c += 3;
    case 0xa4 : case 0xa5: case 0xa6:
        opna->fnum2[c] = (uint8_t)data;
        break;

    case 0xa8:  case 0xa9: case 0xaa:
        opna->fnum3[c] = data + opna->fnum2[c+6] * 0x100;
        break;

    case 0xac : case 0xad: case 0xae:
        opna->fnum2[c+6] = (uint8_t)data;
        break;

    // Algorithm -------------------------------------------------------------
    case 0x1b0: case 0x1b1:  case 0x1b2:
        c += 3;
    case 0xb0:  case 0xb1:  case 0xb2:
        opna->ch[c].fb = fbtab[((data >> 3) & 7)];
        SetAlgorithm(&opna->ch[c], data & 7);
        break;

    case 0x1b4: case 0x1b5: case 0x1b6:
        c += 3;
    case 0xb4: case 0xb5: case 0xb6:
        opna->pan[c] = (data >> 6) & 3;
        for(j=0; j<4; j++) {
            opna->ch[c].op[j].ms = data;
            opna->ch[c].op[j].paramchanged = true;
        }
        break;

    // Rhythm ----------------------------------------------------------------
    case 0x10:          // DM/KEYON
        if (!(data & 0x80))  // KEY ON
        {
            opna->rhythmkey |= data & 0x3f;
            if (data & 0x01) opna->rhythm[0].pos = 0;
            if (data & 0x02) opna->rhythm[1].pos = 0;
            if (data & 0x04) opna->rhythm[2].pos = 0;
            if (data & 0x08) opna->rhythm[3].pos = 0;
            if (data & 0x10) opna->rhythm[4].pos = 0;
            if (data & 0x20) opna->rhythm[5].pos = 0;
        }
        else
        {                   // DUMP
            opna->rhythmkey &= ~data;
        }
        break;

    case 0x11:
        opna->rhythmtl = ~data & 63;
        break;

    case 0x18:      // Bass Drum
    case 0x19:      // Snare Drum
    case 0x1a:      // Top Cymbal
    case 0x1b:      // Hihat
    case 0x1c:      // Tom-tom
    case 0x1d:      // Rim shot
        opna->rhythm[addr & 7].pan   = (data >> 6) & 3;
        opna->rhythm[addr & 7].level = ~data & 31;
        break;

    // LFO -------------------------------------------------------------------
    case 0x22:
        modified = opna->reg22 ^ data;
        opna->reg22 = data;
        if (modified & 0x8)
            opna->lfocount = 0;
        opna->lfodcount = opna->reg22 & 8 ? lfotab[opna->reg22 & 7] : 0;
        break;

    // PSG -------------------------------------------------------------------
    case  0: case  1: case  2: case  3: case  4: case  5: case  6: case  7:
    case  8: case  9: case 10: case 11: case 12: case 13: case 14: case 15:
        PSGSetReg(&opna->psg, addr, data);
        break;

    // ADSR ------------------------------------------------------------------
    default:
        if (c < 3)
        {
            if (addr & 0x100)
                c += 3;
            {
                uint8_t slottable[4] = { 0, 2, 1, 3 };
                uint32_t slot = slottable[(addr >> 2) & 3];
                FMOperator* op = &opna->ch[c].op[slot];
        
                switch ((addr >> 4) & 15)
                {
                case 3: // 30-3E DT/MULTI
                    op->detune = (((data >> 4) & 0x07) * 0x20);
                    op->multiple = (data & 0x0f);
                    op->paramchanged = 1;
                    break;
        
                case 4: // 40-4E TL
                    if(!((opna->regtc & 0x80) && (opna->csmch == &opna->ch[c]))) {
                        op->tl = (data & 0x7f);
                        op->paramchanged = 1;
                    }
                    op->tll = (data & 0x7f);
                    break;
        
                case 5: // 50-5E KS/AR
                    op->ks = ((data >> 6) & 3);
                    op->ar = ((data & 0x1f) * 2);
                    op->paramchanged = 1;
                    break;
        
                case 6: // 60-6E DR/AMON
                    op->dr = ((data & 0x1f) * 2);
                    op->amon = ((data & 0x80) != 0);
                    op->paramchanged = 1;
                    break;
        
                case 7: // 70-7E SR
                    op->sr = ((data & 0x1f) * 2);
                    op->paramchanged = 1;
                    break;
        
                case 8: // 80-8E SL/RR
                    op->sl = (((data >> 4) & 15) * 4); 
                    op->rr = ((data & 0x0f) * 4 + 2);
                    op->paramchanged = 1;
                    break;
        
                case 9: // 90-9E SSG-EC
                    op->ssgtype = (data & 0x0f);
                    break;
                }
            }
        }
        break;
    }
}

// ---------------------------------------------------------------------------
// Read OPNA register. Pointless. Only SSG registers can be read, and of those
// the only one anyone seems to be interested in reading is register 7,
// which as I explain in detail in psg.c, is completely superfluous.
//
uint OPNAGetReg(OPNA *opna, uint addr)
{
    if (addr < 0x10)
        return PSGGetReg(&opna->psg, addr);
    if (addr == 0xff)
        return 1;
    return 0;
}

// ---------------------------------------------------------------------------

static inline void MixSubSL(Channel4 ch[6], int activech, int32_t *dest)
{
    if (activech & 0x001) (*dest  = Ch4CalcL(&ch[0]));
    if (activech & 0x004) (*dest += Ch4CalcL(&ch[1]));
    if (activech & 0x010) (*dest += Ch4CalcL(&ch[2]));
    if (activech & 0x040) (*dest += Ch4CalcL(&ch[3]));
    if (activech & 0x100) (*dest += Ch4CalcL(&ch[4]));
    if (activech & 0x400) (*dest += Ch4CalcL(&ch[5]));
}

static inline void MixSubS(Channel4 ch[6], int activech, int32_t *dest)
{
    if (activech & 0x001) (*dest  = Ch4Calc(&ch[0]));
    if (activech & 0x004) (*dest += Ch4Calc(&ch[1]));
    if (activech & 0x010) (*dest += Ch4Calc(&ch[2]));
    if (activech & 0x040) (*dest += Ch4Calc(&ch[3]));
    if (activech & 0x100) (*dest += Ch4Calc(&ch[4]));
    if (activech & 0x400) (*dest += Ch4Calc(&ch[5]));
}

// ---------------------------------------------------------------------------
// Mix FM channels and output. Mix6 runs at user-specified samplerate,
// Mix6I runs at the chip samplerate of 55466Hz and then downsamples
// to the user-specified samplerate. It is an open problem as to determining
// if one of these sounds better than the other.
//
#define IStoSample(s)   ((Limit((s) >> 2, 0xffff, -0xffff) * opna->fmvolume) >> 14)
//#define IStoSample(s) ((((s) >> 3) * fmvolume) >> 14)

static void Mix6(OPNA *opna, Sample* buffer, uint32_t nsamples, int activech)
{
    Sample* limit = buffer + nsamples;
    Sample* dest;
    // Mix
    int32_t ibuf;

    for (dest = buffer; dest < limit; dest+=1)
    {
        ibuf = 0;
        if (activech & 0xaaa)
            LFO(opna), MixSubSL(opna->ch, activech, &ibuf);
        else
            MixSubS(opna->ch, activech, &ibuf);
        dest[0] += IStoSample(ibuf);
    }
}

// ---------------------------------------------------------------------------
// See comment above Mix6(), above.
//
static void Mix6I(OPNA *opna, Sample* buffer, uint32_t nsamples, int activech)
{
    // Mix
    int32_t ibuf;

    int32_t delta = opna->mixdelta;
    Sample* limit = buffer + nsamples;
    Sample *dest;
    if (opna->mpratio < 16384)
    {
        for (dest = buffer; dest < limit; dest+=1)
        {
            int32_t l, d;
            while (delta > 0)
            {
                ibuf = 0;
                if (activech & 0xaaa)
                    LFO(opna), MixSubSL(opna->ch, activech, &ibuf);
                else
                    MixSubS(opna->ch, activech, &ibuf);

                l = IStoSample(ibuf);
                d = Min(opna->mpratio, delta);
                opna->mixl += l * d;
                delta -= opna->mpratio;
            }
            dest[0] += (opna->mixl >> 14);
            opna->mixl = l * (16384-d);
            delta += 16384;
        }
    } else {
        int impr = 16384 * 16384 / opna->mpratio;
        for (dest = buffer; dest < limit; dest+=1)
        {
            if (delta < 0)
            {
                delta += 16384;
                opna->mixl = opna->mixl1;

                ibuf = 0;
                if (activech & 0xaaa)
                    LFO(opna), MixSubSL(opna->ch, activech, &ibuf);
                else
                    MixSubS(opna->ch, activech, &ibuf);

                opna->mixl1 = IStoSample(ibuf);
            }
            int32_t l = (delta * opna->mixl + (16384 - delta) * opna->mixl1) / 16384;
            dest[0] += l;
            delta -= impr;
        }
    }
    opna->mixdelta = delta;
}

// ---------------------------------------------------------------------------
// Main FM output routine. Clocks all of the operators on the chip, then mixes
// together the output using one of Mix6() or Mix6I() above, and then outputs
// the result to OPNAMix, which is what the calling routine will actually use.
// buffer should be a pointer to a buffer of type Sample (int32_t in this
// implementation, though another used float and in principle int16_t *should*
// be sufficient), and be of size at least equal to nsamples.
//
static void FMMix(OPNA *opna, Sample* buffer, uint32_t nsamples)
{
    uint j;
    if (opna->fmvolume > 0)
    {
        // Set F-Number
        if (!(opna->regtc & 0xc0)) {
            uint _dp = (opna->fnum[opna->csmch-opna->ch] & 2047) << ((opna->fnum[opna->csmch-opna->ch] >> 11) & 7);
            for(j=0; j<4; j++) {
                opna->csmch->op[j].dp = _dp;
                opna->csmch->op[j].bn = notetab[(opna->fnum[opna->csmch-opna->ch] >> 7) & 127];
                opna->csmch->op[j].paramchanged = true;
            }
        } else {
            SetFNum(&opna->csmch->op[0], opna->fnum3[1]); SetFNum(&opna->csmch->op[1], opna->fnum3[2]);
            SetFNum(&opna->csmch->op[2], opna->fnum3[0]); SetFNum(&opna->csmch->op[3], opna->fnum[2]);
        }

        int act = (((Ch4Prepare(&opna->ch[2]) << 2) | Ch4Prepare(&opna->ch[1])) << 2) | Ch4Prepare(&opna->ch[0]);
        if (opna->reg29 & 0x80)
            act |= (Ch4Prepare(&opna->ch[3]) | ((Ch4Prepare(&opna->ch[4]) | (Ch4Prepare(&opna->ch[5]) << 2)) << 2)) << 6;
        if (!(opna->reg22 & 0x08))
            act &= 0x555;

        if (act & 0x555)
        {
            if (opna->interpolation)
                Mix6I(opna, buffer, nsamples, act);
            else
                Mix6(opna, buffer, nsamples, act);
        } else {
            opna->mixl = 0, opna->mixdelta = 16383;
        }
    }
}

// ---------------------------------------------------------------------------
// Mix Rhythm generator output. Boring, just takes the PCM samples,
// multiplies them by the volume set for that rhythm sample, and then outputs
// the appropriate length of sample for that given samplerate to buffer.
// The same restrictions on buffer as in FMMix() above apply.
//
static void RhythmMix(OPNA *opna, Sample* buffer, uint32_t count)
{
    int i;
    Sample *dest;
    if (opna->rhythmtvol < 128 && opna->rhythm[0].sample && (opna->rhythmkey & 0x3f))
    {
        Sample* limit = buffer + count;
        for (i=0; i<6; i++)
        {
            Rhythm *r = &opna->rhythm[i];
            if ((opna->rhythmkey & (1 << i)) && r->level >= 0)
            {
                int db = Limit(opna->rhythmtl+r->level+r->volume, 95, -31);
                int vol = tltab[FM_TLPOS + db];

                for (dest = buffer; dest<limit && r->pos < r->size; dest+=1)
                {
                    int sample = ((r->sample[r->pos / 1024] << 8) * vol) >> 12;
                    r->pos += r->step;
                    dest[0] += sample;
                }
            }
        }
    }
}

// ---------------------------------------------------------------------------
// Main OPNA output routine. See FMMix(), RhythmMix() above and PSGMix()
// in psg.c for details.
//
void OPNAMix(OPNA *opna, Sample* buffer, uint32_t nsamples)
{
    if(opna->devmask & 1) FMMix(opna, buffer, nsamples);
    if(opna->devmask & 2) PSGMix(&opna->psg, buffer, nsamples);
    if(opna->devmask & 4) RhythmMix(opna, buffer, nsamples);
}

// ---------------------------------------------------------------------------
//  Table setup/generation routines.
//  FIXME: unify cltab/tltab and then hardcode the result, it's tiny enough
//         that we don't really need to bother with runtime init for it.
//
void MakeTable(void) {
    int i, j;
    if (tablemade)
        return;

    tablemade = true;
    for (i=-FM_TLPOS; i<FM_TLENTS-FM_TLPOS; i++)
    {
        tltab[FM_TLPOS + i] = (uint32_t)(4096.0f * expf((float)M_LN2*(i * -16.0f / FM_TLENTS)))-1;
//      LOG2("tltab[%4d] = 0x%.4x\n", i, tltab[FM_TLPOS+i]);
    }
    for (i=0; i<512; i++)
    {
        int c = (int)(((1 << 8) - 1) * expf((float)M_LN2*(-i / 64.0f)));
#if 1
        // ÀºÅÙÍÞÀ©
//      c += 1 << 3;
//      c &= ~1 << 3;
        for (j=16; j>11; j--)
        {
            if ((1 << j) & c)
            {
                c &= ((1 << 11) - 1) << (j - 10);
                break;
            }
        }
#endif
        cltab[i] = c;
//      LOG2("cltab[%4d*2] = %6d\n", i, cltab[i*2]);
    }
    //       3       6,      12      30       60       240      420     / 720
    //  1.000963
    //  lfofref[level * max * wave];
    //  pre = lfofref[level][pms * wave >> 8];
    for (i=0; i<8; i++)
    {
        float pmb = pms[i];
        for (j=0; j<FM_LFOENTS; j++)
        {
            pmtable[i][j] =
                (int)(0x10000 * (expf((float)M_LN2*(pmb * (2*j - FM_LFOENTS+1) / (FM_LFOENTS-1)) - 1)));
//          LOG4("pmtable[%d][%.2x] = %5d ", i, j, pmtable[i][j]);
//          LOG1(" %7.2f\n", log(1. + pmtable[i][j] / 65536.) / log(2) * 1200);
        }
    }
}