Mercurial > pmdwin
diff _udivdi3.c @ 0:c55ea9478c80
Hello Gensokyo!
author | Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> |
---|---|
date | Tue, 21 May 2013 10:29:21 +0200 |
parents | |
children |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/_udivdi3.c @@ -0,0 +1,219 @@ +#include <stdint.h> +#define W_TYPE_SIZE 32 +struct DWstruct { + uint32_t low, high; +}; +typedef union { + struct DWstruct s; + int64_t ll; +} DWunion; + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + uint32_t __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + uint32_t __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) + +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mul{l} %3" \ + : "=a" ((uint32_t) (w0)), \ + "=d" ((uint32_t) (w1)) \ + : "%0" ((uint32_t) (u)), \ + "rm" ((uint32_t) (v))) + +#define udiv_qrnnd(q, r, n1, n0, dv) \ + __asm__ ("div{l} %4" \ + : "=a" ((uint32_t) (q)), \ + "=d" ((uint32_t) (r)) \ + : "0" ((uint32_t) (n0)), \ + "1" ((uint32_t) (n1)), \ + "rm" ((uint32_t) (dv))) + +static inline uint32_t clz_32 (uint32_t x) +{ + unsigned int ret; + __asm__ volatile ("or $1, %1 \n\t" + "bsr %1, %0 \n\t" + "sub $31, %0 \n\t" + "neg %0 \n\t" + :"=a" (ret):"D" (x)); + return ret; +} + +uint64_t __udivmoddi4 (uint64_t n, uint64_t d, uint64_t * rp) +{ + const DWunion nn = {.ll = n }; + const DWunion dd = {.ll = d }; + DWunion rr; + uint32_t d0, d1, n0, n1, n2; + uint32_t q0, q1; + uint32_t b, bm; + + d0 = dd.s.low; + d1 = dd.s.high; + n0 = nn.s.low; + n1 = nn.s.high; + +#if !UDIV_NEEDS_NORMALIZATION + if (d1 == 0) { + if (d0 > n1) { + /* 0q = nn / 0D */ + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + /* Remainder in n0. */ + } else { + /* qq = NN / 0d */ + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + udiv_qrnnd (q1, n1, 0, n1, d0); + udiv_qrnnd (q0, n0, n1, n0, d0); + /* Remainder in n0. */ + } + if (rp != 0) { + rr.s.low = n0; + rr.s.high = 0; + *rp = rr.ll; + } + } + +#else /* UDIV_NEEDS_NORMALIZATION */ + + if (d1 == 0) { + if (d0 > n1) { + /* 0q = nn / 0D */ + bm = clz_32 (d0); + if (bm != 0) { + /* Normalize, i.e. make the most significant bit of the + * denominator set. */ + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + /* Remainder in n0 >> bm. */ + } else { + /* qq = NN / 0d */ + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + bm = clz_32 (d0); + if (bm == 0) { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + * conclude (the most significant bit of n1 is set) /\ (the + * leading quotient digit q1 = 1). + * + * This special case is necessary, not an optimization. + * (Shifts counts of W_TYPE_SIZE are undefined.) */ + n1 -= d0; + q1 = 1; + } else { + /* Normalize. */ + b = W_TYPE_SIZE - bm; + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + udiv_qrnnd (q1, n1, n2, n1, d0); + } + /* n1 != d0... */ + udiv_qrnnd (q0, n0, n1, n0, d0); + /* Remainder in n0 >> bm. */ + } + + if (rp != 0) { + rr.s.low = n0 >> bm; + rr.s.high = 0; + *rp = rr.ll; + } + } +#endif /* UDIV_NEEDS_NORMALIZATION */ + else { + if (d1 > n1) { + /* 00 = nn / DD */ + q0 = 0; + q1 = 0; + /* Remainder in n1n0. */ + if (rp != 0) { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else { + /* 0q = NN / dd */ + bm = clz_32 (d1); + if (bm == 0) { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + * conclude (the most significant bit of n1 is set) /\ (the + * quotient digit q0 = 0 or 1). + * + * This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + * n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); + } else { + q0 = 0; + } + q1 = 0; + if (rp != 0) { + rr.s.low = n0; + rr.s.high = n1; + *rp = rr.ll; + } + } + else { + uint32_t m1, m0; + /* Normalize. */ + b = W_TYPE_SIZE - bm; + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + if (m1 > n1 || (m1 == n1 && m0 > n0)) { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + q1 = 0; + /* Remainder in (n1n0 - m1m0) >> bm. */ + if (rp != 0) { + sub_ddmmss (n1, n0, n1, n0, m1, m0); + rr.s.low = (n1 << b) | (n0 >> bm); + rr.s.high = n1 >> bm; + *rp = rr.ll; + } + } + } + } + + const DWunion ww = { {.low = q0,.high = q1} }; + return ww.ll; +} + +uint64_t __umoddi3 (uint64_t u, uint64_t v) +{ + uint64_t w; + __udivmoddi4 (u, v, &w); + return w; +} + +uint64_t __udivdi3 (uint64_t n, uint64_t d) +{ + return __udivmoddi4 (n, d, (uint64_t *) 0); +} +