Mercurial > pmdwin
comparison _udivdi3.c @ 0:c55ea9478c80
Hello Gensokyo!
author | Emmanuel Gil Peyrot <linkmauve@linkmauve.fr> |
---|---|
date | Tue, 21 May 2013 10:29:21 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c55ea9478c80 |
---|---|
1 #include <stdint.h> | |
2 #define W_TYPE_SIZE 32 | |
3 struct DWstruct { | |
4 uint32_t low, high; | |
5 }; | |
6 typedef union { | |
7 struct DWstruct s; | |
8 int64_t ll; | |
9 } DWunion; | |
10 | |
11 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ | |
12 do { \ | |
13 uint32_t __x; \ | |
14 __x = (al) + (bl); \ | |
15 (sh) = (ah) + (bh) + (__x < (al)); \ | |
16 (sl) = __x; \ | |
17 } while (0) | |
18 | |
19 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ | |
20 do { \ | |
21 uint32_t __x; \ | |
22 __x = (al) - (bl); \ | |
23 (sh) = (ah) - (bh) - (__x > (al)); \ | |
24 (sl) = __x; \ | |
25 } while (0) | |
26 | |
27 #define umul_ppmm(w1, w0, u, v) \ | |
28 __asm__ ("mul{l} %3" \ | |
29 : "=a" ((uint32_t) (w0)), \ | |
30 "=d" ((uint32_t) (w1)) \ | |
31 : "%0" ((uint32_t) (u)), \ | |
32 "rm" ((uint32_t) (v))) | |
33 | |
34 #define udiv_qrnnd(q, r, n1, n0, dv) \ | |
35 __asm__ ("div{l} %4" \ | |
36 : "=a" ((uint32_t) (q)), \ | |
37 "=d" ((uint32_t) (r)) \ | |
38 : "0" ((uint32_t) (n0)), \ | |
39 "1" ((uint32_t) (n1)), \ | |
40 "rm" ((uint32_t) (dv))) | |
41 | |
42 static inline uint32_t clz_32 (uint32_t x) | |
43 { | |
44 unsigned int ret; | |
45 __asm__ volatile ("or $1, %1 \n\t" | |
46 "bsr %1, %0 \n\t" | |
47 "sub $31, %0 \n\t" | |
48 "neg %0 \n\t" | |
49 :"=a" (ret):"D" (x)); | |
50 return ret; | |
51 } | |
52 | |
53 uint64_t __udivmoddi4 (uint64_t n, uint64_t d, uint64_t * rp) | |
54 { | |
55 const DWunion nn = {.ll = n }; | |
56 const DWunion dd = {.ll = d }; | |
57 DWunion rr; | |
58 uint32_t d0, d1, n0, n1, n2; | |
59 uint32_t q0, q1; | |
60 uint32_t b, bm; | |
61 | |
62 d0 = dd.s.low; | |
63 d1 = dd.s.high; | |
64 n0 = nn.s.low; | |
65 n1 = nn.s.high; | |
66 | |
67 #if !UDIV_NEEDS_NORMALIZATION | |
68 if (d1 == 0) { | |
69 if (d0 > n1) { | |
70 /* 0q = nn / 0D */ | |
71 udiv_qrnnd (q0, n0, n1, n0, d0); | |
72 q1 = 0; | |
73 /* Remainder in n0. */ | |
74 } else { | |
75 /* qq = NN / 0d */ | |
76 if (d0 == 0) | |
77 d0 = 1 / d0; /* Divide intentionally by zero. */ | |
78 udiv_qrnnd (q1, n1, 0, n1, d0); | |
79 udiv_qrnnd (q0, n0, n1, n0, d0); | |
80 /* Remainder in n0. */ | |
81 } | |
82 if (rp != 0) { | |
83 rr.s.low = n0; | |
84 rr.s.high = 0; | |
85 *rp = rr.ll; | |
86 } | |
87 } | |
88 | |
89 #else /* UDIV_NEEDS_NORMALIZATION */ | |
90 | |
91 if (d1 == 0) { | |
92 if (d0 > n1) { | |
93 /* 0q = nn / 0D */ | |
94 bm = clz_32 (d0); | |
95 if (bm != 0) { | |
96 /* Normalize, i.e. make the most significant bit of the | |
97 * denominator set. */ | |
98 d0 = d0 << bm; | |
99 n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm)); | |
100 n0 = n0 << bm; | |
101 } | |
102 udiv_qrnnd (q0, n0, n1, n0, d0); | |
103 q1 = 0; | |
104 /* Remainder in n0 >> bm. */ | |
105 } else { | |
106 /* qq = NN / 0d */ | |
107 if (d0 == 0) | |
108 d0 = 1 / d0; /* Divide intentionally by zero. */ | |
109 bm = clz_32 (d0); | |
110 if (bm == 0) { | |
111 /* From (n1 >= d0) /\ (the most significant bit of d0 is set), | |
112 * conclude (the most significant bit of n1 is set) /\ (the | |
113 * leading quotient digit q1 = 1). | |
114 * | |
115 * This special case is necessary, not an optimization. | |
116 * (Shifts counts of W_TYPE_SIZE are undefined.) */ | |
117 n1 -= d0; | |
118 q1 = 1; | |
119 } else { | |
120 /* Normalize. */ | |
121 b = W_TYPE_SIZE - bm; | |
122 d0 = d0 << bm; | |
123 n2 = n1 >> b; | |
124 n1 = (n1 << bm) | (n0 >> b); | |
125 n0 = n0 << bm; | |
126 udiv_qrnnd (q1, n1, n2, n1, d0); | |
127 } | |
128 /* n1 != d0... */ | |
129 udiv_qrnnd (q0, n0, n1, n0, d0); | |
130 /* Remainder in n0 >> bm. */ | |
131 } | |
132 | |
133 if (rp != 0) { | |
134 rr.s.low = n0 >> bm; | |
135 rr.s.high = 0; | |
136 *rp = rr.ll; | |
137 } | |
138 } | |
139 #endif /* UDIV_NEEDS_NORMALIZATION */ | |
140 else { | |
141 if (d1 > n1) { | |
142 /* 00 = nn / DD */ | |
143 q0 = 0; | |
144 q1 = 0; | |
145 /* Remainder in n1n0. */ | |
146 if (rp != 0) { | |
147 rr.s.low = n0; | |
148 rr.s.high = n1; | |
149 *rp = rr.ll; | |
150 } | |
151 } | |
152 else { | |
153 /* 0q = NN / dd */ | |
154 bm = clz_32 (d1); | |
155 if (bm == 0) { | |
156 /* From (n1 >= d1) /\ (the most significant bit of d1 is set), | |
157 * conclude (the most significant bit of n1 is set) /\ (the | |
158 * quotient digit q0 = 0 or 1). | |
159 * | |
160 * This special case is necessary, not an optimization. */ | |
161 | |
162 /* The condition on the next line takes advantage of that | |
163 * n1 >= d1 (true due to program flow). */ | |
164 if (n1 > d1 || n0 >= d0) { | |
165 q0 = 1; | |
166 sub_ddmmss (n1, n0, n1, n0, d1, d0); | |
167 } else { | |
168 q0 = 0; | |
169 } | |
170 q1 = 0; | |
171 if (rp != 0) { | |
172 rr.s.low = n0; | |
173 rr.s.high = n1; | |
174 *rp = rr.ll; | |
175 } | |
176 } | |
177 else { | |
178 uint32_t m1, m0; | |
179 /* Normalize. */ | |
180 b = W_TYPE_SIZE - bm; | |
181 d1 = (d1 << bm) | (d0 >> b); | |
182 d0 = d0 << bm; | |
183 n2 = n1 >> b; | |
184 n1 = (n1 << bm) | (n0 >> b); | |
185 n0 = n0 << bm; | |
186 udiv_qrnnd (q0, n1, n2, n1, d1); | |
187 umul_ppmm (m1, m0, q0, d0); | |
188 if (m1 > n1 || (m1 == n1 && m0 > n0)) { | |
189 q0--; | |
190 sub_ddmmss (m1, m0, m1, m0, d1, d0); | |
191 } | |
192 q1 = 0; | |
193 /* Remainder in (n1n0 - m1m0) >> bm. */ | |
194 if (rp != 0) { | |
195 sub_ddmmss (n1, n0, n1, n0, m1, m0); | |
196 rr.s.low = (n1 << b) | (n0 >> bm); | |
197 rr.s.high = n1 >> bm; | |
198 *rp = rr.ll; | |
199 } | |
200 } | |
201 } | |
202 } | |
203 | |
204 const DWunion ww = { {.low = q0,.high = q1} }; | |
205 return ww.ll; | |
206 } | |
207 | |
208 uint64_t __umoddi3 (uint64_t u, uint64_t v) | |
209 { | |
210 uint64_t w; | |
211 __udivmoddi4 (u, v, &w); | |
212 return w; | |
213 } | |
214 | |
215 uint64_t __udivdi3 (uint64_t n, uint64_t d) | |
216 { | |
217 return __udivmoddi4 (n, d, (uint64_t *) 0); | |
218 } | |
219 |