Mercurial > pmdwin

#include <stdint.h>
#define W_TYPE_SIZE 32
struct DWstruct {
  uint32_t low, high;
};
typedef union {
  struct DWstruct s;
  int64_t ll;
} DWunion;

#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
  do {                                  \
    uint32_t __x;                             \
    __x = (al) + (bl);                          \
    (sh) = (ah) + (bh) + (__x < (al));                  \
    (sl) = __x;                             \
  } while (0)

#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
  do {                                  \
    uint32_t __x;                             \
    __x = (al) - (bl);                          \
    (sh) = (ah) - (bh) - (__x > (al));                  \
    (sl) = __x;                             \
  } while (0)

#define umul_ppmm(w1, w0, u, v) \
  __asm__ ("mul{l} %3"                          \
       : "=a" ((uint32_t) (w0)),                 \
         "=d" ((uint32_t) (w1))                  \
       : "%0" ((uint32_t) (u)),                  \
         "rm" ((uint32_t) (v)))

#define udiv_qrnnd(q, r, n1, n0, dv) \
  __asm__ ("div{l} %4"                          \
       : "=a" ((uint32_t) (q)),                  \
         "=d" ((uint32_t) (r))                   \
       : "0" ((uint32_t) (n0)),                  \
         "1" ((uint32_t) (n1)),                  \
         "rm" ((uint32_t) (dv)))

static inline uint32_t clz_32 (uint32_t x)
{
  unsigned int ret;
  __asm__ volatile ("or $1, %1   \n\t"
                    "bsr %1, %0  \n\t"
                    "sub $31, %0 \n\t"
                    "neg %0      \n\t"
                    :"=a" (ret):"D" (x));
  return ret;
}

uint64_t __udivmoddi4 (uint64_t n, uint64_t d, uint64_t * rp)
{
  const DWunion nn = {.ll = n };
  const DWunion dd = {.ll = d };
  DWunion rr;
  uint32_t d0, d1, n0, n1, n2;
  uint32_t q0, q1;
  uint32_t b, bm;

  d0 = dd.s.low;
  d1 = dd.s.high;
  n0 = nn.s.low;
  n1 = nn.s.high;

#if !UDIV_NEEDS_NORMALIZATION
  if (d1 == 0) {
    if (d0 > n1) {
      /* 0q = nn / 0D */
      udiv_qrnnd (q0, n0, n1, n0, d0);
      q1 = 0;
      /* Remainder in n0.  */
    } else {
      /* qq = NN / 0d */
      if (d0 == 0)
        d0 = 1 / d0;            /* Divide intentionally by zero.  */
      udiv_qrnnd (q1, n1, 0, n1, d0);
      udiv_qrnnd (q0, n0, n1, n0, d0);
      /* Remainder in n0.  */
    }
    if (rp != 0) {
      rr.s.low = n0;
      rr.s.high = 0;
      *rp = rr.ll;
    }
  }

#else /* UDIV_NEEDS_NORMALIZATION */

  if (d1 == 0) {
    if (d0 > n1) {
      /* 0q = nn / 0D */
      bm = clz_32 (d0);
      if (bm != 0) {
        /* Normalize, i.e. make the most significant bit of the
         * denominator set.  */
        d0 = d0 << bm;
        n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
        n0 = n0 << bm;
      }
      udiv_qrnnd (q0, n0, n1, n0, d0);
      q1 = 0;
      /* Remainder in n0 >> bm.  */
    } else {
      /* qq = NN / 0d */
      if (d0 == 0)
        d0 = 1 / d0;            /* Divide intentionally by zero.  */
      bm = clz_32 (d0);
      if (bm == 0) {
        /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
         * conclude (the most significant bit of n1 is set) /\ (the
         * leading quotient digit q1 = 1).
         *
         * This special case is necessary, not an optimization.
         * (Shifts counts of W_TYPE_SIZE are undefined.)  */
        n1 -= d0;
        q1 = 1;
      } else {
        /* Normalize.  */
        b = W_TYPE_SIZE - bm;
        d0 = d0 << bm;
        n2 = n1 >> b;
        n1 = (n1 << bm) | (n0 >> b);
        n0 = n0 << bm;
        udiv_qrnnd (q1, n1, n2, n1, d0);
      }
      /* n1 != d0...  */
      udiv_qrnnd (q0, n0, n1, n0, d0);
      /* Remainder in n0 >> bm.  */
    }

    if (rp != 0) {
      rr.s.low = n0 >> bm;
      rr.s.high = 0;
      *rp = rr.ll;
    }
  }
#endif /* UDIV_NEEDS_NORMALIZATION */
  else {
    if (d1 > n1) {
      /* 00 = nn / DD */
      q0 = 0;
      q1 = 0;
      /* Remainder in n1n0.  */
      if (rp != 0) {
        rr.s.low = n0;
        rr.s.high = n1;
        *rp = rr.ll;
      }
    }
    else {
      /* 0q = NN / dd */
      bm = clz_32 (d1);
      if (bm == 0) {
        /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
         * conclude (the most significant bit of n1 is set) /\ (the
         * quotient digit q0 = 0 or 1).
         *
         * This special case is necessary, not an optimization.  */

        /* The condition on the next line takes advantage of that
         * n1 >= d1 (true due to program flow).  */
        if (n1 > d1 || n0 >= d0) {
          q0 = 1;
          sub_ddmmss (n1, n0, n1, n0, d1, d0);
        } else {
          q0 = 0;
		}
        q1 = 0;
        if (rp != 0) {
          rr.s.low = n0;
          rr.s.high = n1;
          *rp = rr.ll;
        }
      }
      else {
        uint32_t m1, m0;
        /* Normalize.  */
        b = W_TYPE_SIZE - bm;
        d1 = (d1 << bm) | (d0 >> b);
        d0 = d0 << bm;
        n2 = n1 >> b;
        n1 = (n1 << bm) | (n0 >> b);
        n0 = n0 << bm;
        udiv_qrnnd (q0, n1, n2, n1, d1);
        umul_ppmm (m1, m0, q0, d0);
        if (m1 > n1 || (m1 == n1 && m0 > n0)) {
          q0--;
          sub_ddmmss (m1, m0, m1, m0, d1, d0);
        }
        q1 = 0;
        /* Remainder in (n1n0 - m1m0) >> bm.  */
        if (rp != 0) {
          sub_ddmmss (n1, n0, n1, n0, m1, m0);
          rr.s.low = (n1 << b) | (n0 >> bm);
          rr.s.high = n1 >> bm;
          *rp = rr.ll;
        }
      }
    }
  }

  const DWunion ww = { {.low = q0,.high = q1} };
  return ww.ll;
}

uint64_t __umoddi3 (uint64_t u, uint64_t v)
{
  uint64_t w;
  __udivmoddi4 (u, v, &w);
  return w;
}

uint64_t __udivdi3 (uint64_t n, uint64_t d)
{
  return __udivmoddi4 (n, d, (uint64_t *) 0);
}
author	Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date	Tue, 21 May 2013 10:45:37 +0200
parents	c55ea9478c80
children