diff _udivdi3.c @ 0:c55ea9478c80

Hello Gensokyo!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Tue, 21 May 2013 10:29:21 +0200
parents
children
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/_udivdi3.c
@@ -0,0 +1,219 @@
+#include <stdint.h>
+#define W_TYPE_SIZE 32
+struct DWstruct {
+  uint32_t low, high;
+};
+typedef union {
+  struct DWstruct s;
+  int64_t ll;
+} DWunion;
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {                                  \
+    uint32_t __x;                             \
+    __x = (al) + (bl);                          \
+    (sh) = (ah) + (bh) + (__x < (al));                  \
+    (sl) = __x;                             \
+  } while (0)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {                                  \
+    uint32_t __x;                             \
+    __x = (al) - (bl);                          \
+    (sh) = (ah) - (bh) - (__x > (al));                  \
+    (sl) = __x;                             \
+  } while (0)
+
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mul{l} %3"                          \
+       : "=a" ((uint32_t) (w0)),                 \
+         "=d" ((uint32_t) (w1))                  \
+       : "%0" ((uint32_t) (u)),                  \
+         "rm" ((uint32_t) (v)))
+
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("div{l} %4"                          \
+       : "=a" ((uint32_t) (q)),                  \
+         "=d" ((uint32_t) (r))                   \
+       : "0" ((uint32_t) (n0)),                  \
+         "1" ((uint32_t) (n1)),                  \
+         "rm" ((uint32_t) (dv)))
+
+static inline uint32_t clz_32 (uint32_t x)
+{
+  unsigned int ret;
+  __asm__ volatile ("or $1, %1   \n\t"
+                    "bsr %1, %0  \n\t"
+                    "sub $31, %0 \n\t"
+                    "neg %0      \n\t"
+                    :"=a" (ret):"D" (x));
+  return ret;
+}
+
+uint64_t __udivmoddi4 (uint64_t n, uint64_t d, uint64_t * rp)
+{
+  const DWunion nn = {.ll = n };
+  const DWunion dd = {.ll = d };
+  DWunion rr;
+  uint32_t d0, d1, n0, n1, n2;
+  uint32_t q0, q1;
+  uint32_t b, bm;
+
+  d0 = dd.s.low;
+  d1 = dd.s.high;
+  n0 = nn.s.low;
+  n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+  if (d1 == 0) {
+    if (d0 > n1) {
+      /* 0q = nn / 0D */
+      udiv_qrnnd (q0, n0, n1, n0, d0);
+      q1 = 0;
+      /* Remainder in n0.  */
+    } else {
+      /* qq = NN / 0d */
+      if (d0 == 0)
+        d0 = 1 / d0;            /* Divide intentionally by zero.  */
+      udiv_qrnnd (q1, n1, 0, n1, d0);
+      udiv_qrnnd (q0, n0, n1, n0, d0);
+      /* Remainder in n0.  */
+    }
+    if (rp != 0) {
+      rr.s.low = n0;
+      rr.s.high = 0;
+      *rp = rr.ll;
+    }
+  }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+  if (d1 == 0) {
+    if (d0 > n1) {
+      /* 0q = nn / 0D */
+      bm = clz_32 (d0);
+      if (bm != 0) {
+        /* Normalize, i.e. make the most significant bit of the
+         * denominator set.  */
+        d0 = d0 << bm;
+        n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+        n0 = n0 << bm;
+      }
+      udiv_qrnnd (q0, n0, n1, n0, d0);
+      q1 = 0;
+      /* Remainder in n0 >> bm.  */
+    } else {
+      /* qq = NN / 0d */
+      if (d0 == 0)
+        d0 = 1 / d0;            /* Divide intentionally by zero.  */
+      bm = clz_32 (d0);
+      if (bm == 0) {
+        /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+         * conclude (the most significant bit of n1 is set) /\ (the
+         * leading quotient digit q1 = 1).
+         * 
+         * This special case is necessary, not an optimization.
+         * (Shifts counts of W_TYPE_SIZE are undefined.)  */
+        n1 -= d0;
+        q1 = 1;
+      } else {
+        /* Normalize.  */
+        b = W_TYPE_SIZE - bm;
+        d0 = d0 << bm;
+        n2 = n1 >> b;
+        n1 = (n1 << bm) | (n0 >> b);
+        n0 = n0 << bm;
+        udiv_qrnnd (q1, n1, n2, n1, d0);
+      }
+      /* n1 != d0...  */
+      udiv_qrnnd (q0, n0, n1, n0, d0);
+      /* Remainder in n0 >> bm.  */
+    }
+
+    if (rp != 0) {
+      rr.s.low = n0 >> bm;
+      rr.s.high = 0;
+      *rp = rr.ll;
+    }
+  }
+#endif /* UDIV_NEEDS_NORMALIZATION */
+  else {
+    if (d1 > n1) {
+      /* 00 = nn / DD */
+      q0 = 0;
+      q1 = 0;
+      /* Remainder in n1n0.  */
+      if (rp != 0) {
+        rr.s.low = n0;
+        rr.s.high = n1;
+        *rp = rr.ll;
+      }
+    }
+    else {
+      /* 0q = NN / dd */
+      bm = clz_32 (d1);
+      if (bm == 0) {
+        /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+         * conclude (the most significant bit of n1 is set) /\ (the
+         * quotient digit q0 = 0 or 1).
+         * 
+         * This special case is necessary, not an optimization.  */
+
+        /* The condition on the next line takes advantage of that
+         * n1 >= d1 (true due to program flow).  */
+        if (n1 > d1 || n0 >= d0) {
+          q0 = 1;
+          sub_ddmmss (n1, n0, n1, n0, d1, d0);
+        } else {
+          q0 = 0;
+		}
+        q1 = 0;
+        if (rp != 0) {
+          rr.s.low = n0;
+          rr.s.high = n1;
+          *rp = rr.ll;
+        }
+      }
+      else {
+        uint32_t m1, m0;
+        /* Normalize.  */
+        b = W_TYPE_SIZE - bm;
+        d1 = (d1 << bm) | (d0 >> b);
+        d0 = d0 << bm;
+        n2 = n1 >> b;
+        n1 = (n1 << bm) | (n0 >> b);
+        n0 = n0 << bm;
+        udiv_qrnnd (q0, n1, n2, n1, d1);
+        umul_ppmm (m1, m0, q0, d0);
+        if (m1 > n1 || (m1 == n1 && m0 > n0)) {
+          q0--;
+          sub_ddmmss (m1, m0, m1, m0, d1, d0);
+        }
+        q1 = 0;
+        /* Remainder in (n1n0 - m1m0) >> bm.  */
+        if (rp != 0) {
+          sub_ddmmss (n1, n0, n1, n0, m1, m0);
+          rr.s.low = (n1 << b) | (n0 >> bm);
+          rr.s.high = n1 >> bm;
+          *rp = rr.ll;
+        }
+      }
+    }
+  }
+
+  const DWunion ww = { {.low = q0,.high = q1} };
+  return ww.ll;
+}
+
+uint64_t __umoddi3 (uint64_t u, uint64_t v)
+{
+  uint64_t w;
+  __udivmoddi4 (u, v, &w);
+  return w;
+}
+
+uint64_t __udivdi3 (uint64_t n, uint64_t d)
+{
+  return __udivmoddi4 (n, d, (uint64_t *) 0);
+}
+