comparison _udivdi3.c @ 0:c55ea9478c80

Hello Gensokyo!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Tue, 21 May 2013 10:29:21 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c55ea9478c80
1 #include <stdint.h>
2 #define W_TYPE_SIZE 32
3 struct DWstruct {
4 uint32_t low, high;
5 };
6 typedef union {
7 struct DWstruct s;
8 int64_t ll;
9 } DWunion;
10
11 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
12 do { \
13 uint32_t __x; \
14 __x = (al) + (bl); \
15 (sh) = (ah) + (bh) + (__x < (al)); \
16 (sl) = __x; \
17 } while (0)
18
19 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
20 do { \
21 uint32_t __x; \
22 __x = (al) - (bl); \
23 (sh) = (ah) - (bh) - (__x > (al)); \
24 (sl) = __x; \
25 } while (0)
26
27 #define umul_ppmm(w1, w0, u, v) \
28 __asm__ ("mul{l} %3" \
29 : "=a" ((uint32_t) (w0)), \
30 "=d" ((uint32_t) (w1)) \
31 : "%0" ((uint32_t) (u)), \
32 "rm" ((uint32_t) (v)))
33
34 #define udiv_qrnnd(q, r, n1, n0, dv) \
35 __asm__ ("div{l} %4" \
36 : "=a" ((uint32_t) (q)), \
37 "=d" ((uint32_t) (r)) \
38 : "0" ((uint32_t) (n0)), \
39 "1" ((uint32_t) (n1)), \
40 "rm" ((uint32_t) (dv)))
41
42 static inline uint32_t clz_32 (uint32_t x)
43 {
44 unsigned int ret;
45 __asm__ volatile ("or $1, %1 \n\t"
46 "bsr %1, %0 \n\t"
47 "sub $31, %0 \n\t"
48 "neg %0 \n\t"
49 :"=a" (ret):"D" (x));
50 return ret;
51 }
52
53 uint64_t __udivmoddi4 (uint64_t n, uint64_t d, uint64_t * rp)
54 {
55 const DWunion nn = {.ll = n };
56 const DWunion dd = {.ll = d };
57 DWunion rr;
58 uint32_t d0, d1, n0, n1, n2;
59 uint32_t q0, q1;
60 uint32_t b, bm;
61
62 d0 = dd.s.low;
63 d1 = dd.s.high;
64 n0 = nn.s.low;
65 n1 = nn.s.high;
66
67 #if !UDIV_NEEDS_NORMALIZATION
68 if (d1 == 0) {
69 if (d0 > n1) {
70 /* 0q = nn / 0D */
71 udiv_qrnnd (q0, n0, n1, n0, d0);
72 q1 = 0;
73 /* Remainder in n0. */
74 } else {
75 /* qq = NN / 0d */
76 if (d0 == 0)
77 d0 = 1 / d0; /* Divide intentionally by zero. */
78 udiv_qrnnd (q1, n1, 0, n1, d0);
79 udiv_qrnnd (q0, n0, n1, n0, d0);
80 /* Remainder in n0. */
81 }
82 if (rp != 0) {
83 rr.s.low = n0;
84 rr.s.high = 0;
85 *rp = rr.ll;
86 }
87 }
88
89 #else /* UDIV_NEEDS_NORMALIZATION */
90
91 if (d1 == 0) {
92 if (d0 > n1) {
93 /* 0q = nn / 0D */
94 bm = clz_32 (d0);
95 if (bm != 0) {
96 /* Normalize, i.e. make the most significant bit of the
97 * denominator set. */
98 d0 = d0 << bm;
99 n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
100 n0 = n0 << bm;
101 }
102 udiv_qrnnd (q0, n0, n1, n0, d0);
103 q1 = 0;
104 /* Remainder in n0 >> bm. */
105 } else {
106 /* qq = NN / 0d */
107 if (d0 == 0)
108 d0 = 1 / d0; /* Divide intentionally by zero. */
109 bm = clz_32 (d0);
110 if (bm == 0) {
111 /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
112 * conclude (the most significant bit of n1 is set) /\ (the
113 * leading quotient digit q1 = 1).
114 *
115 * This special case is necessary, not an optimization.
116 * (Shifts counts of W_TYPE_SIZE are undefined.) */
117 n1 -= d0;
118 q1 = 1;
119 } else {
120 /* Normalize. */
121 b = W_TYPE_SIZE - bm;
122 d0 = d0 << bm;
123 n2 = n1 >> b;
124 n1 = (n1 << bm) | (n0 >> b);
125 n0 = n0 << bm;
126 udiv_qrnnd (q1, n1, n2, n1, d0);
127 }
128 /* n1 != d0... */
129 udiv_qrnnd (q0, n0, n1, n0, d0);
130 /* Remainder in n0 >> bm. */
131 }
132
133 if (rp != 0) {
134 rr.s.low = n0 >> bm;
135 rr.s.high = 0;
136 *rp = rr.ll;
137 }
138 }
139 #endif /* UDIV_NEEDS_NORMALIZATION */
140 else {
141 if (d1 > n1) {
142 /* 00 = nn / DD */
143 q0 = 0;
144 q1 = 0;
145 /* Remainder in n1n0. */
146 if (rp != 0) {
147 rr.s.low = n0;
148 rr.s.high = n1;
149 *rp = rr.ll;
150 }
151 }
152 else {
153 /* 0q = NN / dd */
154 bm = clz_32 (d1);
155 if (bm == 0) {
156 /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
157 * conclude (the most significant bit of n1 is set) /\ (the
158 * quotient digit q0 = 0 or 1).
159 *
160 * This special case is necessary, not an optimization. */
161
162 /* The condition on the next line takes advantage of that
163 * n1 >= d1 (true due to program flow). */
164 if (n1 > d1 || n0 >= d0) {
165 q0 = 1;
166 sub_ddmmss (n1, n0, n1, n0, d1, d0);
167 } else {
168 q0 = 0;
169 }
170 q1 = 0;
171 if (rp != 0) {
172 rr.s.low = n0;
173 rr.s.high = n1;
174 *rp = rr.ll;
175 }
176 }
177 else {
178 uint32_t m1, m0;
179 /* Normalize. */
180 b = W_TYPE_SIZE - bm;
181 d1 = (d1 << bm) | (d0 >> b);
182 d0 = d0 << bm;
183 n2 = n1 >> b;
184 n1 = (n1 << bm) | (n0 >> b);
185 n0 = n0 << bm;
186 udiv_qrnnd (q0, n1, n2, n1, d1);
187 umul_ppmm (m1, m0, q0, d0);
188 if (m1 > n1 || (m1 == n1 && m0 > n0)) {
189 q0--;
190 sub_ddmmss (m1, m0, m1, m0, d1, d0);
191 }
192 q1 = 0;
193 /* Remainder in (n1n0 - m1m0) >> bm. */
194 if (rp != 0) {
195 sub_ddmmss (n1, n0, n1, n0, m1, m0);
196 rr.s.low = (n1 << b) | (n0 >> bm);
197 rr.s.high = n1 >> bm;
198 *rp = rr.ll;
199 }
200 }
201 }
202 }
203
204 const DWunion ww = { {.low = q0,.high = q1} };
205 return ww.ll;
206 }
207
208 uint64_t __umoddi3 (uint64_t u, uint64_t v)
209 {
210 uint64_t w;
211 __udivmoddi4 (u, v, &w);
212 return w;
213 }
214
215 uint64_t __udivdi3 (uint64_t n, uint64_t d)
216 {
217 return __udivmoddi4 (n, d, (uint64_t *) 0);
218 }
219