File Coverage

d2fixed.c

Criterion	Covered	Total	%
statement	2	423	0.4
branch	0	204	0.0
condition			n/a
subroutine			n/a
pod			n/a
total	2	627	0.3

line	stmt	bran	code
1			// Copyright 2018 Ulf Adams
2			//
3			// The contents of this file may be used under the terms of the Apache License,
4			// Version 2.0.
5			//
6			// (See accompanying file LICENSE-Apache or copy at
7			// http://www.apache.org/licenses/LICENSE-2.0)
8			//
9			// Alternatively, the contents of this file may be used under the terms of
10			// the Boost Software License, Version 1.0.
11			// (See accompanying file LICENSE-Boost or copy at
12			// https://www.boost.org/LICENSE_1_0.txt)
13			//
14			// Unless required by applicable law or agreed to in writing, this software
15			// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16			// KIND, either express or implied.
17
18			// Runtime compiler options:
19			// -DRYU_DEBUG Generate verbose debugging output to stdout.
20			//
21			// -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,
22			// depending on your compiler.
23			//
24			// -DRYU_AVOID_UINT128 Avoid using uint128_t. Slower, depending on your compiler.
25
26			/* Sisyphus has applied some superficial changes to this file because perl has *
27			* not always honored "C99 mode". The location of the header files, relative *
28			* to the location of this file, has also changed */
29
30			#include "ryu_headers/ryu.h"
31
32			#include
33			#include
34			#include
35			#include
36			#include
37
38			#ifdef RYU_DEBUG
39			#include
40			#include
41			#endif
42
43			#include "ryu_headers/common.h"
44			#include "ryu_headers/digit_table.h"
45			#include "ryu_headers/d2fixed_full_table.h"
46			#include "ryu_headers/d2s_intrinsics.h"
47
48			#define DOUBLE_MANTISSA_BITS 52
49			#define DOUBLE_EXPONENT_BITS 11
50			#define DOUBLE_BIAS 1023
51
52			#define POW10_ADDITIONAL_BITS 120
53
54			#if defined(HAS_UINT128)
55	0		static inline uint128_t umul256(const uint128_t a, const uint64_t bHi, const uint64_t bLo, uint128_t* const productHi) {
56	0		const uint64_t aLo = (uint64_t)a;
57	0		const uint64_t aHi = (uint64_t)(a >> 64);
58
59	0		const uint128_t b00 = (uint128_t)aLo * bLo;
60	0		const uint128_t b01 = (uint128_t)aLo * bHi;
61	0		const uint128_t b10 = (uint128_t)aHi * bLo;
62	0		const uint128_t b11 = (uint128_t)aHi * bHi;
63
64	0		const uint64_t b00Lo = (uint64_t)b00;
65	0		const uint64_t b00Hi = (uint64_t)(b00 >> 64);
66
67	0		const uint128_t mid1 = b10 + b00Hi;
68	0		const uint64_t mid1Lo = (uint64_t)(mid1);
69	0		const uint64_t mid1Hi = (uint64_t)(mid1 >> 64);
70
71	0		const uint128_t mid2 = b01 + mid1Lo;
72	0		const uint64_t mid2Lo = (uint64_t)(mid2);
73	0		const uint64_t mid2Hi = (uint64_t)(mid2 >> 64);
74
75	0		const uint128_t pHi = b11 + mid1Hi + mid2Hi;
76	0		const uint128_t pLo = ((uint128_t)mid2Lo << 64) \| b00Lo;
77
78	0		*productHi = pHi;
79	0		return pLo;
80			}
81
82			// Returns the high 128 bits of the 256-bit product of a and b.
83	0		static inline uint128_t umul256_hi(const uint128_t a, const uint64_t bHi, const uint64_t bLo) {
84			// Reuse the umul256 implementation.
85			// Optimizers will likely eliminate the instructions used to compute the
86			// low part of the product.
87			uint128_t hi;
88	0		umul256(a, bHi, bLo, &hi);
89	0		return hi;
90			}
91
92			// Unfortunately, gcc/clang do not automatically turn a 128-bit integer division
93			// into a multiplication, so we have to do it manually.
94	0		static inline uint32_t uint128_mod1e9(const uint128_t v) {
95			// After multiplying, we're going to shift right by 29, then truncate to uint32_t.
96			// This means that we need only 29 + 32 = 61 bits, so we can truncate to uint64_t before shifting.
97	0		const uint64_t multiplied = (uint64_t) umul256_hi(v, 0x89705F4136B4A597u, 0x31680A88F8953031u);
98
99			// For uint32_t truncation, see the mod1e9() comment in d2s_intrinsics.h.
100	0		const uint32_t shifted = (uint32_t) (multiplied >> 29);
101
102	0		return ((uint32_t) v) - 1000000000 * shifted;
103			}
104
105			// Best case: use 128-bit type.
106	0		static inline uint32_t mulShift_mod1e9(const uint64_t m, const uint64_t* const mul, const int32_t j) {
107	0		const uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
108	0		const uint128_t b1 = ((uint128_t) m) * mul[1]; // 64
109	0		const uint128_t b2 = ((uint128_t) m) * mul[2]; // 128
110			#ifdef RYU_DEBUG
111	0	0	if (j < 128 \|\| j > 180) {
		0
112	0		printf("%d\n", j);
113			}
114			#endif
115	0	0	assert(j >= 128);
116	0	0	assert(j <= 180);
117			// j: [128, 256)
118	0		const uint128_t mid = b1 + (uint64_t) (b0 >> 64); // 64
119	0		const uint128_t s1 = b2 + (uint64_t) (mid >> 64); // 128
120	0		return uint128_mod1e9(s1 >> (j - 128));
121			}
122
123			#else // HAS_UINT128
124
125			#if defined(HAS_64_BIT_INTRINSICS)
126			// Returns the low 64 bits of the high 128 bits of the 256-bit product of a and b.
127			static inline uint64_t umul256_hi128_lo64(
128			const uint64_t aHi, const uint64_t aLo, const uint64_t bHi, const uint64_t bLo) {
129			uint64_t b00Hi;
130			const uint64_t b00Lo = umul128(aLo, bLo, &b00Hi);
131			uint64_t b01Hi;
132			const uint64_t b01Lo = umul128(aLo, bHi, &b01Hi);
133			uint64_t b10Hi;
134			const uint64_t b10Lo = umul128(aHi, bLo, &b10Hi);
135			uint64_t b11Hi;
136			const uint64_t b11Lo = umul128(aHi, bHi, &b11Hi);
137			(void) b00Lo; // unused
138			(void) b11Hi; // unused
139			const uint64_t temp1Lo = b10Lo + b00Hi;
140			const uint64_t temp1Hi = b10Hi + (temp1Lo < b10Lo);
141			const uint64_t temp2Lo = b01Lo + temp1Lo;
142			const uint64_t temp2Hi = b01Hi + (temp2Lo < b01Lo);
143			return b11Lo + temp1Hi + temp2Hi;
144			}
145
146			static inline uint32_t uint128_mod1e9(const uint64_t vHi, const uint64_t vLo) {
147			// After multiplying, we're going to shift right by 29, then truncate to uint32_t.
148			// This means that we need only 29 + 32 = 61 bits, so we can truncate to uint64_t before shifting.
149			const uint64_t multiplied = umul256_hi128_lo64(vHi, vLo, 0x89705F4136B4A597u, 0x31680A88F8953031u);
150
151			// For uint32_t truncation, see the mod1e9() comment in d2s_intrinsics.h.
152			const uint32_t shifted = (uint32_t) (multiplied >> 29);
153
154			return ((uint32_t) vLo) - 1000000000 * shifted;
155			}
156			#endif // HAS_64_BIT_INTRINSICS
157
158			static inline uint32_t mulShift_mod1e9(const uint64_t m, const uint64_t* const mul, const int32_t j) {
159			uint64_t high0; // 64
160			const uint64_t low0 = umul128(m, mul[0], &high0); // 0
161			uint64_t high1; // 128
162			const uint64_t low1 = umul128(m, mul[1], &high1); // 64
163			uint64_t high2; // 192
164			const uint64_t low2 = umul128(m, mul[2], &high2); // 128
165			const uint64_t s0low = low0; // 0
166			(void) s0low; // unused
167			const uint64_t s0high = low1 + high0; // 64
168			const uint32_t c1 = s0high < low1;
169			const uint64_t s1low = low2 + high1 + c1; // 128
170			const uint32_t c2 = s1low < low2; // high1 + c1 can't overflow, so compare against low2
171			const uint64_t s1high = high2 + c2; // 192
172			#ifdef RYU_DEBUG
173			if (j < 128 \|\| j > 180) {
174			printf("%d\n", j);
175			}
176			#endif
177			assert(j >= 128);
178			assert(j <= 180);
179			#if defined(HAS_64_BIT_INTRINSICS)
180			const uint32_t dist = (uint32_t) (j - 128); // dist: [0, 52]
181			const uint64_t shiftedhigh = s1high >> dist;
182			const uint64_t shiftedlow = shiftright128(s1low, s1high, dist);
183			return uint128_mod1e9(shiftedhigh, shiftedlow);
184			#else // HAS_64_BIT_INTRINSICS
185			if (j < 160) { // j: [128, 160)
186			const uint64_t r0 = mod1e9(s1high);
187			const uint64_t r1 = mod1e9((r0 << 32) \| (s1low >> 32));
188			const uint64_t r2 = ((r1 << 32) \| (s1low & 0xffffffff));
189			return mod1e9(r2 >> (j - 128));
190			} else { // j: [160, 192)
191			const uint64_t r0 = mod1e9(s1high);
192			const uint64_t r1 = ((r0 << 32) \| (s1low >> 32));
193			return mod1e9(r1 >> (j - 160));
194			}
195			#endif // HAS_64_BIT_INTRINSICS
196			}
197			#endif // HAS_UINT128
198
199			// Convert `digits` to a sequence of decimal digits. Append the digits to the result.
200			// The caller has to guarantee that:
201			// 10^(olength-1) <= digits < 10^olength
202			// e.g., by passing `olength` as `decimalLength9(digits)`.
203	0		static inline void append_n_digits(const uint32_t olength, uint32_t digits, char* const result) {
204			#ifdef RYU_DEBUG
205	0		printf("DIGITS=%u\n", digits);
206			#endif
207
208	0		uint32_t i = 0;
209	0	0	while (digits >= 10000) {
210			#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
211			const uint32_t c = digits - 10000 * (digits / 10000);
212			#else
213	0		const uint32_t c = digits % 10000;
214			#endif
215	0		digits /= 10000;
216	0		const uint32_t c0 = (c % 100) << 1;
217	0		const uint32_t c1 = (c / 100) << 1;
218	0		memcpy(result + olength - i - 2, DIGIT_TABLE + c0, 2);
219	0		memcpy(result + olength - i - 4, DIGIT_TABLE + c1, 2);
220	0		i += 4;
221			}
222	0	0	if (digits >= 100) {
223	0		const uint32_t c = (digits % 100) << 1;
224	0		digits /= 100;
225	0		memcpy(result + olength - i - 2, DIGIT_TABLE + c, 2);
226	0		i += 2;
227			}
228	0	0	if (digits >= 10) {
229	0		const uint32_t c = digits << 1;
230	0		memcpy(result + olength - i - 2, DIGIT_TABLE + c, 2);
231			} else {
232	0		result[0] = (char) ('0' + digits);
233			}
234	0		}
235
236			// Convert `digits` to a sequence of decimal digits. Print the first digit, followed by a decimal
237			// dot '.' followed by the remaining digits. The caller has to guarantee that:
238			// 10^(olength-1) <= digits < 10^olength
239			// e.g., by passing `olength` as `decimalLength9(digits)`.
240	0		static inline void append_d_digits(const uint32_t olength, uint32_t digits, char* const result) {
241			#ifdef RYU_DEBUG
242	0		printf("DIGITS=%u\n", digits);
243			#endif
244
245	0		uint32_t i = 0;
246	0	0	while (digits >= 10000) {
247			#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
248			const uint32_t c = digits - 10000 * (digits / 10000);
249			#else
250	0		const uint32_t c = digits % 10000;
251			#endif
252	0		digits /= 10000;
253	0		const uint32_t c0 = (c % 100) << 1;
254	0		const uint32_t c1 = (c / 100) << 1;
255	0		memcpy(result + olength + 1 - i - 2, DIGIT_TABLE + c0, 2);
256	0		memcpy(result + olength + 1 - i - 4, DIGIT_TABLE + c1, 2);
257	0		i += 4;
258			}
259	0	0	if (digits >= 100) {
260	0		const uint32_t c = (digits % 100) << 1;
261	0		digits /= 100;
262	0		memcpy(result + olength + 1 - i - 2, DIGIT_TABLE + c, 2);
263	0		i += 2;
264			}
265	0	0	if (digits >= 10) {
266	0		const uint32_t c = digits << 1;
267	0		result[2] = DIGIT_TABLE[c + 1];
268	0		result[1] = '.';
269	0		result[0] = DIGIT_TABLE[c];
270			} else {
271	0		result[1] = '.';
272	0		result[0] = (char) ('0' + digits);
273			}
274	0		}
275
276			// Convert `digits` to decimal and write the last `count` decimal digits to result.
277			// If `digits` contains additional digits, then those are silently ignored.
278	0		static inline void append_c_digits(const uint32_t count, uint32_t digits, char* const result) {
279			#ifdef RYU_DEBUG
280	0		printf("DIGITS=%u\n", digits);
281			#endif
282			// Copy pairs of digits from DIGIT_TABLE.
283	0		uint32_t i = 0;
284	0	0	for (; i < count - 1; i += 2) {
285	0		const uint32_t c = (digits % 100) << 1;
286	0		digits /= 100;
287	0		memcpy(result + count - i - 2, DIGIT_TABLE + c, 2);
288			}
289			// Generate the last digit if count is odd.
290	0	0	if (i < count) {
291	0		const char c = (char) ('0' + (digits % 10));
292	0		result[count - i - 1] = c;
293			}
294	0		}
295
296			// Convert `digits` to decimal and write the last 9 decimal digits to result.
297			// If `digits` contains additional digits, then those are silently ignored.
298	0		static inline void append_nine_digits(uint32_t digits, char* const result) {
299			uint32_t i;
300			#ifdef RYU_DEBUG
301	0		printf("DIGITS=%u\n", digits);
302			#endif
303	0	0	if (digits == 0) {
304	0		memset(result, '0', 9);
305	0		return;
306			}
307
308	0	0	for (i = 0; i < 5; i += 4) {
309			#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
310			const uint32_t c = digits - 10000 * (digits / 10000);
311			#else
312	0		const uint32_t c = digits % 10000;
313			#endif
314	0		digits /= 10000;
315	0		const uint32_t c0 = (c % 100) << 1;
316	0		const uint32_t c1 = (c / 100) << 1;
317	0		memcpy(result + 7 - i, DIGIT_TABLE + c0, 2);
318	0		memcpy(result + 5 - i, DIGIT_TABLE + c1, 2);
319			}
320	0		result[0] = (char) ('0' + digits);
321			}
322
323	0		static inline uint32_t indexForExponent(const uint32_t e) {
324	0		return (e + 15) / 16;
325			}
326
327	0		static inline uint32_t pow10BitsForIndex(const uint32_t idx) {
328	0		return 16 * idx + POW10_ADDITIONAL_BITS;
329			}
330
331	0		static inline uint32_t lengthForIndex(const uint32_t idx) {
332			// +1 for ceil, +16 for mantissa, +8 to round up when dividing by 9
333	0		return (log10Pow2(16 * (int32_t) idx) + 1 + 16 + 8) / 9;
334			}
335
336	0		static inline int copy_special_str_printf(char* const result, const bool sign, const uint64_t mantissa) {
337			#if defined(_MSC_VER)
338			// TODO: Check that -nan is expected output on Windows.
339			if (sign) {
340			result[0] = '-';
341			}
342			if (mantissa) {
343			if (mantissa < (1ull << (DOUBLE_MANTISSA_BITS - 1))) {
344			memcpy(result + sign, "nan(snan)", 9);
345			return sign + 9;
346			}
347			memcpy(result + sign, "nan", 3);
348			return sign + 3;
349			}
350			#else
351	0	0	if (mantissa) {
352	0		memcpy(result, "nan", 3);
353	0		return 3;
354			}
355	0	0	if (sign) {
356	0		result[0] = '-';
357			}
358			#endif
359	0		memcpy(result + sign, "Infinity", 8);
360	0		return sign + 8;
361			}
362
363	0		int d2fixed_buffered_n(double d, uint32_t precision, char* result) {
364	0		const uint64_t bits = double_to_bits(d);
365			#ifdef RYU_DEBUG
366			int32_t bit;
367	0		printf("IN=");
368	0	0	for (bit = 63; bit >= 0; --bit) {
369	0		printf("%d", (int) ((bits >> bit) & 1));
370			}
371	0		printf("\n");
372			#endif
373
374			// Decode bits into sign, mantissa, and exponent.
375	0		const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
376	0		const uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);
377	0		const uint32_t ieeeExponent = (uint32_t) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
378
379			// Case distinction; exit early for the easy cases.
380	0	0	if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
381	0		return copy_special_str_printf(result, ieeeSign, ieeeMantissa);
382			}
383	0	0	if (ieeeExponent == 0 && ieeeMantissa == 0) {
		0
384	0		int index = 0;
385	0	0	if (ieeeSign) {
386	0		result[index++] = '-';
387			}
388	0		result[index++] = '0';
389	0	0	if (precision > 0) {
390	0		result[index++] = '.';
391	0		memset(result + index, '0', precision);
392	0		index += precision;
393			}
394	0		return index;
395			}
396
397			int32_t e2;
398			int32_t i;
399			uint64_t m2;
400	0	0	if (ieeeExponent == 0) {
401	0		e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
402	0		m2 = ieeeMantissa;
403			} else {
404	0		e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
405	0		m2 = (1ull << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
406			}
407
408			#ifdef RYU_DEBUG
409	0		printf("-> %" PRIu64 " * 2^%d\n", m2, e2);
410			#endif
411
412	0		int index = 0;
413	0		bool nonzero = false;
414	0	0	if (ieeeSign) {
415	0		result[index++] = '-';
416			}
417	0	0	if (e2 >= -52) {
418	0	0	const uint32_t idx = e2 < 0 ? 0 : indexForExponent((uint32_t) e2);
419	0		const uint32_t p10bits = pow10BitsForIndex(idx);
420	0		const int32_t len = (int32_t) lengthForIndex(idx);
421			#ifdef RYU_DEBUG
422	0		printf("idx=%u\n", idx);
423	0		printf("len=%d\n", len);
424			#endif
425	0	0	for (i = len - 1; i >= 0; --i) {
426	0		const uint32_t j = p10bits - e2;
427			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
428			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
429	0		const uint32_t digits = mulShift_mod1e9(m2 << 8, POW10_SPLIT[POW10_OFFSET[idx] + i], (int32_t) (j + 8));
430	0	0	if (nonzero) {
431	0		append_nine_digits(digits, result + index);
432	0		index += 9;
433	0	0	} else if (digits != 0) {
434	0		const uint32_t olength = decimalLength9(digits);
435	0		append_n_digits(olength, digits, result + index);
436	0		index += olength;
437	0		nonzero = true;
438			}
439			}
440			}
441	0	0	if (!nonzero) {
442	0		result[index++] = '0';
443			}
444	0	0	if (precision > 0) {
445	0		result[index++] = '.';
446			}
447			#ifdef RYU_DEBUG
448	0		printf("e2=%d\n", e2);
449			#endif
450	0	0	if (e2 < 0) {
451	0		const int32_t idx = -e2 / 16;
452			#ifdef RYU_DEBUG
453	0		printf("idx=%d\n", idx);
454			#endif
455	0		const uint32_t blocks = precision / 9 + 1;
456			// 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd.
457	0		int roundUp = 0;
458	0		uint32_t i = 0;
459	0	0	if (blocks <= MIN_BLOCK_2[idx]) {
460	0		i = blocks;
461	0		memset(result + index, '0', precision);
462	0		index += precision;
463	0	0	} else if (i < MIN_BLOCK_2[idx]) {
464	0		i = MIN_BLOCK_2[idx];
465	0		memset(result + index, '0', 9 * i);
466	0		index += 9 * i;
467			}
468	0	0	for (; i < blocks; ++i) {
469	0		const int32_t j = ADDITIONAL_BITS_2 + (-e2 - 16 * idx);
470	0		const uint32_t p = POW10_OFFSET_2[idx] + i - MIN_BLOCK_2[idx];
471	0	0	if (p >= POW10_OFFSET_2[idx + 1]) {
472			// If the remaining digits are all 0, then we might as well use memset.
473			// No rounding required in this case.
474	0		const uint32_t fill = precision - 9 * i;
475	0		memset(result + index, '0', fill);
476	0		index += fill;
477	0		break;
478			}
479			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
480			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
481	0		uint32_t digits = mulShift_mod1e9(m2 << 8, POW10_SPLIT_2[p], j + 8);
482			uint32_t k;
483			#ifdef RYU_DEBUG
484	0		printf("digits=%u\n", digits);
485			#endif
486	0	0	if (i < blocks - 1) {
487	0		append_nine_digits(digits, result + index);
488	0		index += 9;
489			} else {
490	0		const uint32_t maximum = precision - 9 * i;
491	0		uint32_t lastDigit = 0;
492	0	0	for (k = 0; k < 9 - maximum; ++k) {
493	0		lastDigit = digits % 10;
494	0		digits /= 10;
495			}
496			#ifdef RYU_DEBUG
497	0		printf("lastDigit=%u\n", lastDigit);
498			#endif
499	0	0	if (lastDigit != 5) {
500	0		roundUp = lastDigit > 5;
501			} else {
502			// Is m * 10^(additionalDigits + 1) / 2^(-e2) integer?
503	0		const int32_t requiredTwos = -e2 - (int32_t) precision - 1;
504	0		const bool trailingZeros = requiredTwos <= 0
505	0	0	\|\| (requiredTwos < 60 && multipleOfPowerOf2(m2, (uint32_t) requiredTwos));
		0
		0
506	0	0	roundUp = trailingZeros ? 2 : 1;
507			#ifdef RYU_DEBUG
508	0		printf("requiredTwos=%d\n", requiredTwos);
509	0	0	printf("trailingZeros=%s\n", trailingZeros ? "true" : "false");
510			#endif
511			}
512	0	0	if (maximum > 0) {
513	0		append_c_digits(maximum, digits, result + index);
514	0		index += maximum;
515			}
516	0		break;
517			}
518			}
519			#ifdef RYU_DEBUG
520	0		printf("roundUp=%d\n", roundUp);
521			#endif
522	0	0	if (roundUp != 0) {
523	0		int roundIndex = index;
524	0		int dotIndex = 0; // '.' can't be located at index 0
525			while (true) {
526	0		--roundIndex;
527			char c;
528	0	0	if (roundIndex == -1 \|\| (c = result[roundIndex], c == '-')) {
		0
529	0		result[roundIndex + 1] = '1';
530	0	0	if (dotIndex > 0) {
531	0		result[dotIndex] = '0';
532	0		result[dotIndex + 1] = '.';
533			}
534	0		result[index++] = '0';
535	0		break;
536			}
537	0	0	if (c == '.') {
538	0		dotIndex = roundIndex;
539	0		continue;
540	0	0	} else if (c == '9') {
541	0		result[roundIndex] = '0';
542	0		roundUp = 1;
543	0		continue;
544			} else {
545	0	0	if (roundUp == 2 && c % 2 == 0) {
		0
546	0		break;
547			}
548	0		result[roundIndex] = c + 1;
549	0		break;
550			}
551	0		}
552			}
553			} else {
554	0		memset(result + index, '0', precision);
555	0		index += precision;
556			}
557	0		return index;
558			}
559
560	0		void d2fixed_buffered(double d, uint32_t precision, char* result) {
561	0		const int len = d2fixed_buffered_n(d, precision, result);
562	0		result[len] = '\0';
563	0		}
564
565	0		char* d2fixed(double d, uint32_t precision) {
566	0		char* const buffer = (char*)malloc(2000);
567	0		const int index = d2fixed_buffered_n(d, precision, buffer);
568	0		buffer[index] = '\0';
569	0		return buffer;
570			}
571
572
573
574	0		int d2exp_buffered_n(double d, uint32_t precision, char* result) {
575	0		const uint64_t bits = double_to_bits(d);
576			#ifdef RYU_DEBUG
577			int32_t bit;
578	0		printf("IN=");
579	0	0	for (bit = 63; bit >= 0; --bit) {
580	0		printf("%d", (int) ((bits >> bit) & 1));
581			}
582	0		printf("\n");
583			#endif
584
585			// Decode bits into sign, mantissa, and exponent.
586	0		const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
587	0		const uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);
588	0		const uint32_t ieeeExponent = (uint32_t) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
589
590			// Case distinction; exit early for the easy cases.
591	0	0	if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
592	0		return copy_special_str_printf(result, ieeeSign, ieeeMantissa);
593			}
594	0	0	if (ieeeExponent == 0 && ieeeMantissa == 0) {
		0
595	0		int index = 0;
596	0	0	if (ieeeSign) {
597	0		result[index++] = '-';
598			}
599	0		result[index++] = '0';
600	0	0	if (precision > 0) {
601	0		result[index++] = '.';
602	0		memset(result + index, '0', precision);
603	0		index += precision;
604			}
605	0		memcpy(result + index, "e+00", 4);
606	0		index += 4;
607	0		return index;
608			}
609
610			int32_t e2;
611			uint64_t m2;
612	0	0	if (ieeeExponent == 0) {
613	0		e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
614	0		m2 = ieeeMantissa;
615			} else {
616	0		e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
617	0		m2 = (1ull << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
618			}
619
620			#ifdef RYU_DEBUG
621	0		printf("-> %" PRIu64 " * 2^%d\n", m2, e2);
622			#endif
623
624	0		const bool printDecimalPoint = precision > 0;
625	0		++precision;
626	0		int index = 0;
627	0	0	if (ieeeSign) {
628	0		result[index++] = '-';
629			}
630	0		uint32_t digits = 0;
631	0		uint32_t printedDigits = 0;
632	0		uint32_t availableDigits = 0;
633	0		int32_t exp = 0;
634			int32_t i;
635	0	0	if (e2 >= -52) {
636	0	0	const uint32_t idx = e2 < 0 ? 0 : indexForExponent((uint32_t) e2);
637	0		const uint32_t p10bits = pow10BitsForIndex(idx);
638	0		const int32_t len = (int32_t) lengthForIndex(idx);
639			#ifdef RYU_DEBUG
640	0		printf("idx=%u\n", idx);
641	0		printf("len=%d\n", len);
642			#endif
643	0	0	for (i = len - 1; i >= 0; --i) {
644	0		const uint32_t j = p10bits - e2;
645			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
646			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
647	0		digits = mulShift_mod1e9(m2 << 8, POW10_SPLIT[POW10_OFFSET[idx] + i], (int32_t) (j + 8));
648	0	0	if (printedDigits != 0) {
649	0	0	if (printedDigits + 9 > precision) {
650	0		availableDigits = 9;
651	0		break;
652			}
653	0		append_nine_digits(digits, result + index);
654	0		index += 9;
655	0		printedDigits += 9;
656	0	0	} else if (digits != 0) {
657	0		availableDigits = decimalLength9(digits);
658	0		exp = i * 9 + (int32_t) availableDigits - 1;
659	0	0	if (availableDigits > precision) {
660	0		break;
661			}
662	0	0	if (printDecimalPoint) {
663	0		append_d_digits(availableDigits, digits, result + index);
664	0		index += availableDigits + 1; // +1 for decimal point
665			} else {
666	0		result[index++] = (char) ('0' + digits);
667			}
668	0		printedDigits = availableDigits;
669	0		availableDigits = 0;
670			}
671			}
672			}
673
674	0	0	if (e2 < 0 && availableDigits == 0) {
		0
675	0		const int32_t idx = -e2 / 16;
676			int32_t i;
677			#ifdef RYU_DEBUG
678	0		printf("idx=%d, e2=%d, min=%d\n", idx, e2, MIN_BLOCK_2[idx]);
679			#endif
680	0	0	for (i = MIN_BLOCK_2[idx]; i < 200; ++i) {
681	0		const int32_t j = ADDITIONAL_BITS_2 + (-e2 - 16 * idx);
682	0		const uint32_t p = POW10_OFFSET_2[idx] + (uint32_t) i - MIN_BLOCK_2[idx];
683			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
684			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
685	0	0	digits = (p >= POW10_OFFSET_2[idx + 1]) ? 0 : mulShift_mod1e9(m2 << 8, POW10_SPLIT_2[p], j + 8);
686			#ifdef RYU_DEBUG
687	0		printf("exact=%" PRIu64 " * (%" PRIu64 " + %" PRIu64 " << 64) >> %d\n", m2, POW10_SPLIT_2[p][0], POW10_SPLIT_2[p][1], j);
688	0		printf("digits=%u\n", digits);
689			#endif
690	0	0	if (printedDigits != 0) {
691	0	0	if (printedDigits + 9 > precision) {
692	0		availableDigits = 9;
693	0		break;
694			}
695	0		append_nine_digits(digits, result + index);
696	0		index += 9;
697	0		printedDigits += 9;
698	0	0	} else if (digits != 0) {
699	0		availableDigits = decimalLength9(digits);
700	0		exp = -(i + 1) * 9 + (int32_t) availableDigits - 1;
701	0	0	if (availableDigits > precision) {
702	0		break;
703			}
704	0	0	if (printDecimalPoint) {
705	0		append_d_digits(availableDigits, digits, result + index);
706	0		index += availableDigits + 1; // +1 for decimal point
707			} else {
708	0		result[index++] = (char) ('0' + digits);
709			}
710	0		printedDigits = availableDigits;
711	0		availableDigits = 0;
712			}
713			}
714			}
715
716	0		const uint32_t maximum = precision - printedDigits;
717			#ifdef RYU_DEBUG
718	0		printf("availableDigits=%u\n", availableDigits);
719	0		printf("digits=%u\n", digits);
720	0		printf("maximum=%u\n", maximum);
721			#endif
722	0	0	if (availableDigits == 0) {
723	0		digits = 0;
724			}
725	0		uint32_t lastDigit = 0;
726			uint32_t k;
727	0	0	if (availableDigits > maximum) {
728	0	0	for (k = 0; k < availableDigits - maximum; ++k) {
729	0		lastDigit = digits % 10;
730	0		digits /= 10;
731			}
732			}
733			#ifdef RYU_DEBUG
734	0		printf("lastDigit=%u\n", lastDigit);
735			#endif
736			// 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd.
737	0		int roundUp = 0;
738	0	0	if (lastDigit != 5) {
739	0		roundUp = lastDigit > 5;
740			} else {
741			// Is m * 2^e2 * 10^(precision + 1 - exp) integer?
742			// precision was already increased by 1, so we don't need to write + 1 here.
743	0		const int32_t rexp = (int32_t) precision - exp;
744	0		const int32_t requiredTwos = -e2 - rexp;
745	0		bool trailingZeros = requiredTwos <= 0
746	0	0	\|\| (requiredTwos < 60 && multipleOfPowerOf2(m2, (uint32_t) requiredTwos));
		0
		0
747	0	0	if (rexp < 0) {
748	0		const int32_t requiredFives = -rexp;
749	0	0	trailingZeros = trailingZeros && multipleOfPowerOf5(m2, (uint32_t) requiredFives);
		0
750			}
751	0	0	roundUp = trailingZeros ? 2 : 1;
752			#ifdef RYU_DEBUG
753	0		printf("requiredTwos=%d\n", requiredTwos);
754	0	0	printf("trailingZeros=%s\n", trailingZeros ? "true" : "false");
755			#endif
756			}
757	0	0	if (printedDigits != 0) {
758	0	0	if (digits == 0) {
759	0		memset(result + index, '0', maximum);
760			} else {
761	0		append_c_digits(maximum, digits, result + index);
762			}
763	0		index += maximum;
764			} else {
765	0	0	if (printDecimalPoint) {
766	0		append_d_digits(maximum, digits, result + index);
767	0		index += maximum + 1; // +1 for decimal point
768			} else {
769	0		result[index++] = (char) ('0' + digits);
770			}
771			}
772			#ifdef RYU_DEBUG
773	0		printf("roundUp=%d\n", roundUp);
774			#endif
775	0	0	if (roundUp != 0) {
776	0		int roundIndex = index;
777			while (true) {
778	0		--roundIndex;
779			char c;
780	0	0	if (roundIndex == -1 \|\| (c = result[roundIndex], c == '-')) {
		0
781	0		result[roundIndex + 1] = '1';
782	0		++exp;
783	0		break;
784			}
785	0	0	if (c == '.') {
786	0		continue;
787	0	0	} else if (c == '9') {
788	0		result[roundIndex] = '0';
789	0		roundUp = 1;
790	0		continue;
791			} else {
792	0	0	if (roundUp == 2 && c % 2 == 0) {
		0
793	0		break;
794			}
795	0		result[roundIndex] = c + 1;
796	0		break;
797			}
798	0		}
799			}
800	0		result[index++] = 'e';
801	0	0	if (exp < 0) {
802	0		result[index++] = '-';
803	0		exp = -exp;
804			} else {
805	0		result[index++] = '+';
806			}
807
808	0	0	if (exp >= 100) {
809	0		const int32_t c = exp % 10;
810	0		memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2);
811	0		result[index + 2] = (char) ('0' + c);
812	0		index += 3;
813			} else {
814	0		memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
815	0		index += 2;
816			}
817
818	0		return index;
819			}
820
821	0		void d2exp_buffered(double d, uint32_t precision, char* result) {
822	0		const int len = d2exp_buffered_n(d, precision, result);
823	0		result[len] = '\0';
824	0		}
825
826	0		char* d2exp(double d, uint32_t precision) {
827	0		char* const buffer = (char*)malloc(2000);
828	0		const int index = d2exp_buffered_n(d, precision, buffer);
829	0		buffer[index] = '\0';
830	0		return buffer;
831			}
832
833	1		int _has_uint128(void) { /* added by sisyphus: */
834			#if defined(HAS_UINT128)
835	1		return 1;
836			#else
837			return 0;
838			#endif
839			}
840