File Coverage

d2fixed.c

Criterion	Covered	Total	%
statement	280	384	72.9
branch	110	192	57.2
condition			n/a
subroutine			n/a
pod			n/a
total	390	576	67.7

line	stmt	bran	code
1			// Copyright 2018 Ulf Adams
2			//
3			// The contents of this file may be used under the terms of the Apache License,
4			// Version 2.0.
5			//
6			// (See accompanying file LICENSE-Apache or copy at
7			// http://www.apache.org/licenses/LICENSE-2.0)
8			//
9			// Alternatively, the contents of this file may be used under the terms of
10			// the Boost Software License, Version 1.0.
11			// (See accompanying file LICENSE-Boost or copy at
12			// https://www.boost.org/LICENSE_1_0.txt)
13			//
14			// Unless required by applicable law or agreed to in writing, this software
15			// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16			// KIND, either express or implied.
17
18			// Runtime compiler options:
19			// -DRYU_DEBUG Generate verbose debugging output to stdout.
20			//
21			// -DRYU_ONLY_64_BIT_OPS Avoid using uint128_t or 64-bit intrinsics. Slower,
22			// depending on your compiler.
23			//
24			// -DRYU_AVOID_UINT128 Avoid using uint128_t. Slower, depending on your compiler.
25
26			/* Sisyphus has applied some superficial changes to this file because perl has *
27			* not always honored "C99 mode". The location of the header files, relative *
28			* to the location of this file, has also changed */
29
30			#include "ryu_headers/ryu.h"
31
32			#include
33			#include
34			#include
35			#include
36			#include
37
38			#ifdef RYU_DEBUG
39			#include
40			#include
41			#endif
42
43			#include "ryu_headers/common.h"
44			#include "ryu_headers/digit_table.h"
45			#include "ryu_headers/d2fixed_full_table.h"
46			#include "ryu_headers/d2s_intrinsics.h"
47
48			#define DOUBLE_MANTISSA_BITS 52
49			#define DOUBLE_EXPONENT_BITS 11
50			#define DOUBLE_BIAS 1023
51
52			#define POW10_ADDITIONAL_BITS 120
53
54			#if defined(HAS_UINT128)
55	535		static inline uint128_t umul256(const uint128_t a, const uint64_t bHi, const uint64_t bLo, uint128_t* const productHi) {
56	535		const uint64_t aLo = (uint64_t)a;
57	535		const uint64_t aHi = (uint64_t)(a >> 64);
58
59	535		const uint128_t b00 = (uint128_t)aLo * bLo;
60	535		const uint128_t b01 = (uint128_t)aLo * bHi;
61	535		const uint128_t b10 = (uint128_t)aHi * bLo;
62	535		const uint128_t b11 = (uint128_t)aHi * bHi;
63
64	535		const uint64_t b00Lo = (uint64_t)b00;
65	535		const uint64_t b00Hi = (uint64_t)(b00 >> 64);
66
67	535		const uint128_t mid1 = b10 + b00Hi;
68	535		const uint64_t mid1Lo = (uint64_t)(mid1);
69	535		const uint64_t mid1Hi = (uint64_t)(mid1 >> 64);
70
71	535		const uint128_t mid2 = b01 + mid1Lo;
72	535		const uint64_t mid2Lo = (uint64_t)(mid2);
73	535		const uint64_t mid2Hi = (uint64_t)(mid2 >> 64);
74
75	535		const uint128_t pHi = b11 + mid1Hi + mid2Hi;
76	535		const uint128_t pLo = ((uint128_t)mid2Lo << 64) \| b00Lo;
77
78	535		*productHi = pHi;
79	535		return pLo;
80			}
81
82			// Returns the high 128 bits of the 256-bit product of a and b.
83	535		static inline uint128_t umul256_hi(const uint128_t a, const uint64_t bHi, const uint64_t bLo) {
84			// Reuse the umul256 implementation.
85			// Optimizers will likely eliminate the instructions used to compute the
86			// low part of the product.
87			uint128_t hi;
88	535		umul256(a, bHi, bLo, &hi);
89	535		return hi;
90			}
91
92			// Unfortunately, gcc/clang do not automatically turn a 128-bit integer division
93			// into a multiplication, so we have to do it manually.
94	535		static inline uint32_t uint128_mod1e9(const uint128_t v) {
95			// After multiplying, we're going to shift right by 29, then truncate to uint32_t.
96			// This means that we need only 29 + 32 = 61 bits, so we can truncate to uint64_t before shifting.
97	535		const uint64_t multiplied = (uint64_t) umul256_hi(v, 0x89705F4136B4A597u, 0x31680A88F8953031u);
98
99			// For uint32_t truncation, see the mod1e9() comment in d2s_intrinsics.h.
100	535		const uint32_t shifted = (uint32_t) (multiplied >> 29);
101
102	535		return ((uint32_t) v) - 1000000000 * shifted;
103			}
104
105			// Best case: use 128-bit type.
106	535		static inline uint32_t mulShift_mod1e9(const uint64_t m, const uint64_t* const mul, const int32_t j) {
107	535		const uint128_t b0 = ((uint128_t) m) * mul[0]; // 0
108	535		const uint128_t b1 = ((uint128_t) m) * mul[1]; // 64
109	535		const uint128_t b2 = ((uint128_t) m) * mul[2]; // 128
110			#ifdef RYU_DEBUG
111			if (j < 128 \|\| j > 180) {
112			printf("%d\n", j);
113			}
114			#endif
115	535	50	assert(j >= 128);
116	535	50	assert(j <= 180);
117			// j: [128, 256)
118	535		const uint128_t mid = b1 + (uint64_t) (b0 >> 64); // 64
119	535		const uint128_t s1 = b2 + (uint64_t) (mid >> 64); // 128
120	535		return uint128_mod1e9(s1 >> (j - 128));
121			}
122
123			#else // HAS_UINT128
124
125			#if defined(HAS_64_BIT_INTRINSICS)
126			// Returns the low 64 bits of the high 128 bits of the 256-bit product of a and b.
127			static inline uint64_t umul256_hi128_lo64(
128			const uint64_t aHi, const uint64_t aLo, const uint64_t bHi, const uint64_t bLo) {
129			uint64_t b00Hi;
130			const uint64_t b00Lo = umul128(aLo, bLo, &b00Hi);
131			uint64_t b01Hi;
132			const uint64_t b01Lo = umul128(aLo, bHi, &b01Hi);
133			uint64_t b10Hi;
134			const uint64_t b10Lo = umul128(aHi, bLo, &b10Hi);
135			uint64_t b11Hi;
136			const uint64_t b11Lo = umul128(aHi, bHi, &b11Hi);
137			(void) b00Lo; // unused
138			(void) b11Hi; // unused
139			const uint64_t temp1Lo = b10Lo + b00Hi;
140			const uint64_t temp1Hi = b10Hi + (temp1Lo < b10Lo);
141			const uint64_t temp2Lo = b01Lo + temp1Lo;
142			const uint64_t temp2Hi = b01Hi + (temp2Lo < b01Lo);
143			return b11Lo + temp1Hi + temp2Hi;
144			}
145
146			static inline uint32_t uint128_mod1e9(const uint64_t vHi, const uint64_t vLo) {
147			// After multiplying, we're going to shift right by 29, then truncate to uint32_t.
148			// This means that we need only 29 + 32 = 61 bits, so we can truncate to uint64_t before shifting.
149			const uint64_t multiplied = umul256_hi128_lo64(vHi, vLo, 0x89705F4136B4A597u, 0x31680A88F8953031u);
150
151			// For uint32_t truncation, see the mod1e9() comment in d2s_intrinsics.h.
152			const uint32_t shifted = (uint32_t) (multiplied >> 29);
153
154			return ((uint32_t) vLo) - 1000000000 * shifted;
155			}
156			#endif // HAS_64_BIT_INTRINSICS
157
158			static inline uint32_t mulShift_mod1e9(const uint64_t m, const uint64_t* const mul, const int32_t j) {
159			uint64_t high0; // 64
160			const uint64_t low0 = umul128(m, mul[0], &high0); // 0
161			uint64_t high1; // 128
162			const uint64_t low1 = umul128(m, mul[1], &high1); // 64
163			uint64_t high2; // 192
164			const uint64_t low2 = umul128(m, mul[2], &high2); // 128
165			const uint64_t s0low = low0; // 0
166			(void) s0low; // unused
167			const uint64_t s0high = low1 + high0; // 64
168			const uint32_t c1 = s0high < low1;
169			const uint64_t s1low = low2 + high1 + c1; // 128
170			const uint32_t c2 = s1low < low2; // high1 + c1 can't overflow, so compare against low2
171			const uint64_t s1high = high2 + c2; // 192
172			#ifdef RYU_DEBUG
173			if (j < 128 \|\| j > 180) {
174			printf("%d\n", j);
175			}
176			#endif
177			assert(j >= 128);
178			assert(j <= 180);
179			#if defined(HAS_64_BIT_INTRINSICS)
180			const uint32_t dist = (uint32_t) (j - 128); // dist: [0, 52]
181			const uint64_t shiftedhigh = s1high >> dist;
182			const uint64_t shiftedlow = shiftright128(s1low, s1high, dist);
183			return uint128_mod1e9(shiftedhigh, shiftedlow);
184			#else // HAS_64_BIT_INTRINSICS
185			if (j < 160) { // j: [128, 160)
186			const uint64_t r0 = mod1e9(s1high);
187			const uint64_t r1 = mod1e9((r0 << 32) \| (s1low >> 32));
188			const uint64_t r2 = ((r1 << 32) \| (s1low & 0xffffffff));
189			return mod1e9(r2 >> (j - 128));
190			} else { // j: [160, 192)
191			const uint64_t r0 = mod1e9(s1high);
192			const uint64_t r1 = ((r0 << 32) \| (s1low >> 32));
193			return mod1e9(r1 >> (j - 160));
194			}
195			#endif // HAS_64_BIT_INTRINSICS
196			}
197			#endif // HAS_UINT128
198
199			// Convert `digits` to a sequence of decimal digits. Append the digits to the result.
200			// The caller has to guarantee that:
201			// 10^(olength-1) <= digits < 10^olength
202			// e.g., by passing `olength` as `decimalLength9(digits)`.
203	10		static inline void append_n_digits(const uint32_t olength, uint32_t digits, char* const result) {
204			#ifdef RYU_DEBUG
205			printf("DIGITS=%u\n", digits);
206			#endif
207
208	10		uint32_t i = 0;
209	10	50	while (digits >= 10000) {
210			#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
211			const uint32_t c = digits - 10000 * (digits / 10000);
212			#else
213	0		const uint32_t c = digits % 10000;
214			#endif
215	0		digits /= 10000;
216	0		const uint32_t c0 = (c % 100) << 1;
217	0		const uint32_t c1 = (c / 100) << 1;
218	0		memcpy(result + olength - i - 2, DIGIT_TABLE + c0, 2);
219	0		memcpy(result + olength - i - 4, DIGIT_TABLE + c1, 2);
220	0		i += 4;
221			}
222	10	50	if (digits >= 100) {
223	0		const uint32_t c = (digits % 100) << 1;
224	0		digits /= 100;
225	0		memcpy(result + olength - i - 2, DIGIT_TABLE + c, 2);
226	0		i += 2;
227			}
228	10	50	if (digits >= 10) {
229	0		const uint32_t c = digits << 1;
230	0		memcpy(result + olength - i - 2, DIGIT_TABLE + c, 2);
231			} else {
232	10		result[0] = (char) ('0' + digits);
233			}
234	10		}
235
236			// Convert `digits` to a sequence of decimal digits. Print the first digit, followed by a decimal
237			// dot '.' followed by the remaining digits. The caller has to guarantee that:
238			// 10^(olength-1) <= digits < 10^olength
239			// e.g., by passing `olength` as `decimalLength9(digits)`.
240	35		static inline void append_d_digits(const uint32_t olength, uint32_t digits, char* const result) {
241			#ifdef RYU_DEBUG
242			printf("DIGITS=%u\n", digits);
243			#endif
244
245	35		uint32_t i = 0;
246	65	100	while (digits >= 10000) {
247			#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
248			const uint32_t c = digits - 10000 * (digits / 10000);
249			#else
250	30		const uint32_t c = digits % 10000;
251			#endif
252	30		digits /= 10000;
253	30		const uint32_t c0 = (c % 100) << 1;
254	30		const uint32_t c1 = (c / 100) << 1;
255	30		memcpy(result + olength + 1 - i - 2, DIGIT_TABLE + c0, 2);
256	30		memcpy(result + olength + 1 - i - 4, DIGIT_TABLE + c1, 2);
257	30		i += 4;
258			}
259	35	50	if (digits >= 100) {
260	0		const uint32_t c = (digits % 100) << 1;
261	0		digits /= 100;
262	0		memcpy(result + olength + 1 - i - 2, DIGIT_TABLE + c, 2);
263	0		i += 2;
264			}
265	35	50	if (digits >= 10) {
266	0		const uint32_t c = digits << 1;
267	0		result[2] = DIGIT_TABLE[c + 1];
268	0		result[1] = '.';
269	0		result[0] = DIGIT_TABLE[c];
270			} else {
271	35		result[1] = '.';
272	35		result[0] = (char) ('0' + digits);
273			}
274	35		}
275
276			// Convert `digits` to decimal and write the last `count` decimal digits to result.
277			// If `digits` contains additional digits, then those are silently ignored.
278	25		static inline void append_c_digits(const uint32_t count, uint32_t digits, char* const result) {
279			#ifdef RYU_DEBUG
280			printf("DIGITS=%u\n", digits);
281			#endif
282			// Copy pairs of digits from DIGIT_TABLE.
283	25		uint32_t i = 0;
284	40	100	for (; i < count - 1; i += 2) {
285	15		const uint32_t c = (digits % 100) << 1;
286	15		digits /= 100;
287	15		memcpy(result + count - i - 2, DIGIT_TABLE + c, 2);
288			}
289			// Generate the last digit if count is odd.
290	25	100	if (i < count) {
291	10		const char c = (char) ('0' + (digits % 10));
292	10		result[count - i - 1] = c;
293			}
294	25		}
295
296			// Convert `digits` to decimal and write the last 9 decimal digits to result.
297			// If `digits` contains additional digits, then those are silently ignored.
298	415		static inline void append_nine_digits(uint32_t digits, char* const result) {
299			uint32_t i;
300			#ifdef RYU_DEBUG
301			printf("DIGITS=%u\n", digits);
302			#endif
303	415	100	if (digits == 0) {
304	45		memset(result, '0', 9);
305	45		return;
306			}
307
308	1110	100	for (i = 0; i < 5; i += 4) {
309			#ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217
310			const uint32_t c = digits - 10000 * (digits / 10000);
311			#else
312	740		const uint32_t c = digits % 10000;
313			#endif
314	740		digits /= 10000;
315	740		const uint32_t c0 = (c % 100) << 1;
316	740		const uint32_t c1 = (c / 100) << 1;
317	740		memcpy(result + 7 - i, DIGIT_TABLE + c0, 2);
318	740		memcpy(result + 5 - i, DIGIT_TABLE + c1, 2);
319			}
320	370		result[0] = (char) ('0' + digits);
321			}
322
323	0		static inline uint32_t indexForExponent(const uint32_t e) {
324	0		return (e + 15) / 16;
325			}
326
327	20		static inline uint32_t pow10BitsForIndex(const uint32_t idx) {
328	20		return 16 * idx + POW10_ADDITIONAL_BITS;
329			}
330
331	20		static inline uint32_t lengthForIndex(const uint32_t idx) {
332			// +1 for ceil, +16 for mantissa, +8 to round up when dividing by 9
333	20		return (log10Pow2(16 * (int32_t) idx) + 1 + 16 + 8) / 9;
334			}
335
336	0		static inline int copy_special_str_printf(char* const result, const bool sign, const uint64_t mantissa) {
337			#if defined(_MSC_VER)
338			// TODO: Check that -nan is expected output on Windows.
339			if (sign) {
340			result[0] = '-';
341			}
342			if (mantissa) {
343			if (mantissa < (1ull << (DOUBLE_MANTISSA_BITS - 1))) {
344			memcpy(result + sign, "nan(snan)", 9);
345			return sign + 9;
346			}
347			memcpy(result + sign, "nan", 3);
348			return sign + 3;
349			}
350			#else
351	0	0	if (mantissa) {
352	0		memcpy(result, "nan", 3);
353	0		return 3;
354			}
355	0	0	if (sign) {
356	0		result[0] = '-';
357			}
358			#endif
359	0		memcpy(result + sign, "Infinity", 8);
360	0		return sign + 8;
361			}
362
363	35		int d2fixed_buffered_n(double d, uint32_t precision, char* result) {
364	35		const uint64_t bits = double_to_bits(d);
365			#ifdef RYU_DEBUG
366			printf("IN=");
367			for (int32_t bit = 63; bit >= 0; --bit) {
368			printf("%d", (int) ((bits >> bit) & 1));
369			}
370			printf("\n");
371			#endif
372
373			// Decode bits into sign, mantissa, and exponent.
374	35		const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
375	35		const uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);
376	35		const uint32_t ieeeExponent = (uint32_t) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
377
378			// Case distinction; exit early for the easy cases.
379	35	50	if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
380	0		return copy_special_str_printf(result, ieeeSign, ieeeMantissa);
381			}
382	35	100	if (ieeeExponent == 0 && ieeeMantissa == 0) {
		50
383	0		int index = 0;
384	0	0	if (ieeeSign) {
385	0		result[index++] = '-';
386			}
387	0		result[index++] = '0';
388	0	0	if (precision > 0) {
389	0		result[index++] = '.';
390	0		memset(result + index, '0', precision);
391	0		index += precision;
392			}
393	0		return index;
394			}
395
396			int32_t e2;
397			int32_t i;
398			uint64_t m2;
399	35	100	if (ieeeExponent == 0) {
400	10		e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
401	10		m2 = ieeeMantissa;
402			} else {
403	25		e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
404	25		m2 = (1ull << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
405			}
406
407			#ifdef RYU_DEBUG
408			printf("-> %" PRIu64 " * 2^%d\n", m2, e2);
409			#endif
410
411	35		int index = 0;
412	35		bool nonzero = false;
413	35	100	if (ieeeSign) {
414	10		result[index++] = '-';
415			}
416	35	100	if (e2 >= -52) {
417	10	50	const uint32_t idx = e2 < 0 ? 0 : indexForExponent((uint32_t) e2);
418	10		const uint32_t p10bits = pow10BitsForIndex(idx);
419	10		const int32_t len = (int32_t) lengthForIndex(idx);
420			#ifdef RYU_DEBUG
421			printf("idx=%u\n", idx);
422			printf("len=%d\n", len);
423			#endif
424	30	100	for (i = len - 1; i >= 0; --i) {
425	20		const uint32_t j = p10bits - e2;
426			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
427			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
428	20		const uint32_t digits = mulShift_mod1e9(m2 << 8, POW10_SPLIT[POW10_OFFSET[idx] + i], (int32_t) (j + 8));
429	20	50	if (nonzero) {
430	0		append_nine_digits(digits, result + index);
431	0		index += 9;
432	20	100	} else if (digits != 0) {
433	10		const uint32_t olength = decimalLength9(digits);
434	10		append_n_digits(olength, digits, result + index);
435	10		index += olength;
436	10		nonzero = true;
437			}
438			}
439			}
440	35	100	if (!nonzero) {
441	25		result[index++] = '0';
442			}
443	35	50	if (precision > 0) {
444	35		result[index++] = '.';
445			}
446			#ifdef RYU_DEBUG
447			printf("e2=%d\n", e2);
448			#endif
449	35	50	if (e2 < 0) {
450	35		const int32_t idx = -e2 / 16;
451			#ifdef RYU_DEBUG
452			printf("idx=%d\n", idx);
453			#endif
454	35		const uint32_t blocks = precision / 9 + 1;
455			// 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd.
456	35		int roundUp = 0;
457	35		uint32_t i = 0;
458	35	50	if (blocks <= MIN_BLOCK_2[idx]) {
459	0		i = blocks;
460	0		memset(result + index, '0', precision);
461	0		index += precision;
462	35	100	} else if (i < MIN_BLOCK_2[idx]) {
463	10		i = MIN_BLOCK_2[idx];
464	10		memset(result + index, '0', 9 * i);
465	10		index += 9 * i;
466			}
467	255	50	for (; i < blocks; ++i) {
468	255		const int32_t j = ADDITIONAL_BITS_2 + (-e2 - 16 * idx);
469	255		const uint32_t p = POW10_OFFSET_2[idx] + i - MIN_BLOCK_2[idx];
470	255	100	if (p >= POW10_OFFSET_2[idx + 1]) {
471			// If the remaining digits are all 0, then we might as well use memset.
472			// No rounding required in this case.
473	25		const uint32_t fill = precision - 9 * i;
474	25		memset(result + index, '0', fill);
475	25		index += fill;
476	25		break;
477			}
478			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
479			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
480	230		uint32_t digits = mulShift_mod1e9(m2 << 8, POW10_SPLIT_2[p], j + 8);
481			uint32_t k;
482			#ifdef RYU_DEBUG
483			printf("digits=%u\n", digits);
484			#endif
485	230	100	if (i < blocks - 1) {
486	220		append_nine_digits(digits, result + index);
487	220		index += 9;
488			} else {
489	10		const uint32_t maximum = precision - 9 * i;
490	10		uint32_t lastDigit = 0;
491	100	100	for (k = 0; k < 9 - maximum; ++k) {
492	90		lastDigit = digits % 10;
493	90		digits /= 10;
494			}
495			#ifdef RYU_DEBUG
496			printf("lastDigit=%u\n", lastDigit);
497			#endif
498	10	50	if (lastDigit != 5) {
499	10		roundUp = lastDigit > 5;
500			} else {
501			// Is m * 10^(additionalDigits + 1) / 2^(-e2) integer?
502	0		const int32_t requiredTwos = -e2 - (int32_t) precision - 1;
503	0		const bool trailingZeros = requiredTwos <= 0
504	0	0	\|\| (requiredTwos < 60 && multipleOfPowerOf2(m2, (uint32_t) requiredTwos));
		0
		0
505	0	0	roundUp = trailingZeros ? 2 : 1;
506			#ifdef RYU_DEBUG
507			printf("requiredTwos=%d\n", requiredTwos);
508			printf("trailingZeros=%s\n", trailingZeros ? "true" : "false");
509			#endif
510			}
511	10	50	if (maximum > 0) {
512	0		append_c_digits(maximum, digits, result + index);
513	0		index += maximum;
514			}
515	10		break;
516			}
517			}
518			#ifdef RYU_DEBUG
519			printf("roundUp=%d\n", roundUp);
520			#endif
521	35	100	if (roundUp != 0) {
522	10		int roundIndex = index;
523	10		int dotIndex = 0; // '.' can't be located at index 0
524			while (true) {
525	10		--roundIndex;
526			char c;
527	10	50	if (roundIndex == -1 \|\| (c = result[roundIndex], c == '-')) {
		50
528	0		result[roundIndex + 1] = '1';
529	0	0	if (dotIndex > 0) {
530	0		result[dotIndex] = '0';
531	0		result[dotIndex + 1] = '.';
532			}
533	0		result[index++] = '0';
534	0		break;
535			}
536	10	50	if (c == '.') {
537	0		dotIndex = roundIndex;
538	0		continue;
539	10	50	} else if (c == '9') {
540	0		result[roundIndex] = '0';
541	0		roundUp = 1;
542	0		continue;
543			} else {
544	10	50	if (roundUp == 2 && c % 2 == 0) {
		0
545	0		break;
546			}
547	10		result[roundIndex] = c + 1;
548	10		break;
549			}
550	35		}
551			}
552			} else {
553	0		memset(result + index, '0', precision);
554	0		index += precision;
555			}
556	35		return index;
557			}
558
559	7		void d2fixed_buffered(double d, uint32_t precision, char* result) {
560	7		const int len = d2fixed_buffered_n(d, precision, result);
561	7		result[len] = '\0';
562	7		}
563
564	21		char* d2fixed(double d, uint32_t precision) {
565	21		char* const buffer = (char*)malloc(2000);
566	21		const int index = d2fixed_buffered_n(d, precision, buffer);
567	21		buffer[index] = '\0';
568	21		return buffer;
569			}
570
571
572
573	35		int d2exp_buffered_n(double d, uint32_t precision, char* result) {
574	35		const uint64_t bits = double_to_bits(d);
575			#ifdef RYU_DEBUG
576			printf("IN=");
577			for (int32_t bit = 63; bit >= 0; --bit) {
578			printf("%d", (int) ((bits >> bit) & 1));
579			}
580			printf("\n");
581			#endif
582
583			// Decode bits into sign, mantissa, and exponent.
584	35		const bool ieeeSign = ((bits >> (DOUBLE_MANTISSA_BITS + DOUBLE_EXPONENT_BITS)) & 1) != 0;
585	35		const uint64_t ieeeMantissa = bits & ((1ull << DOUBLE_MANTISSA_BITS) - 1);
586	35		const uint32_t ieeeExponent = (uint32_t) ((bits >> DOUBLE_MANTISSA_BITS) & ((1u << DOUBLE_EXPONENT_BITS) - 1));
587
588			// Case distinction; exit early for the easy cases.
589	35	50	if (ieeeExponent == ((1u << DOUBLE_EXPONENT_BITS) - 1u)) {
590	0		return copy_special_str_printf(result, ieeeSign, ieeeMantissa);
591			}
592	35	100	if (ieeeExponent == 0 && ieeeMantissa == 0) {
		50
593	0		int index = 0;
594	0	0	if (ieeeSign) {
595	0		result[index++] = '-';
596			}
597	0		result[index++] = '0';
598	0	0	if (precision > 0) {
599	0		result[index++] = '.';
600	0		memset(result + index, '0', precision);
601	0		index += precision;
602			}
603	0		memcpy(result + index, "e+00", 4);
604	0		index += 4;
605	0		return index;
606			}
607
608			int32_t e2;
609			uint64_t m2;
610	35	100	if (ieeeExponent == 0) {
611	10		e2 = 1 - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
612	10		m2 = ieeeMantissa;
613			} else {
614	25		e2 = (int32_t) ieeeExponent - DOUBLE_BIAS - DOUBLE_MANTISSA_BITS;
615	25		m2 = (1ull << DOUBLE_MANTISSA_BITS) \| ieeeMantissa;
616			}
617
618			#ifdef RYU_DEBUG
619			printf("-> %" PRIu64 " * 2^%d\n", m2, e2);
620			#endif
621
622	35		const bool printDecimalPoint = precision > 0;
623	35		++precision;
624	35		int index = 0;
625	35	100	if (ieeeSign) {
626	10		result[index++] = '-';
627			}
628	35		uint32_t digits = 0;
629	35		uint32_t printedDigits = 0;
630	35		uint32_t availableDigits = 0;
631	35		int32_t exp = 0;
632			int32_t i;
633	35	100	if (e2 >= -52) {
634	10	50	const uint32_t idx = e2 < 0 ? 0 : indexForExponent((uint32_t) e2);
635	10		const uint32_t p10bits = pow10BitsForIndex(idx);
636	10		const int32_t len = (int32_t) lengthForIndex(idx);
637			#ifdef RYU_DEBUG
638			printf("idx=%u\n", idx);
639			printf("len=%d\n", len);
640			#endif
641	30	100	for (i = len - 1; i >= 0; --i) {
642	20		const uint32_t j = p10bits - e2;
643			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
644			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
645	20		digits = mulShift_mod1e9(m2 << 8, POW10_SPLIT[POW10_OFFSET[idx] + i], (int32_t) (j + 8));
646	20	50	if (printedDigits != 0) {
647	0	0	if (printedDigits + 9 > precision) {
648	0		availableDigits = 9;
649	0		break;
650			}
651	0		append_nine_digits(digits, result + index);
652	0		index += 9;
653	0		printedDigits += 9;
654	20	100	} else if (digits != 0) {
655	10		availableDigits = decimalLength9(digits);
656	10		exp = i * 9 + (int32_t) availableDigits - 1;
657	10	50	if (availableDigits > precision) {
658	0		break;
659			}
660	10	50	if (printDecimalPoint) {
661	10		append_d_digits(availableDigits, digits, result + index);
662	10		index += availableDigits + 1; // +1 for decimal point
663			} else {
664	0		result[index++] = (char) ('0' + digits);
665			}
666	10		printedDigits = availableDigits;
667	10		availableDigits = 0;
668			}
669			}
670			}
671
672	35	50	if (e2 < 0 && availableDigits == 0) {
		50
673	35		const int32_t idx = -e2 / 16;
674			int32_t i;
675			#ifdef RYU_DEBUG
676			printf("idx=%d, e2=%d, min=%d\n", idx, e2, MIN_BLOCK_2[idx]);
677			#endif
678	265	50	for (i = MIN_BLOCK_2[idx]; i < 200; ++i) {
679	265		const int32_t j = ADDITIONAL_BITS_2 + (-e2 - 16 * idx);
680	265		const uint32_t p = POW10_OFFSET_2[idx] + (uint32_t) i - MIN_BLOCK_2[idx];
681			// Temporary: j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
682			// a slightly faster code path in mulShift_mod1e9. Instead, we can just increase the multipliers.
683	265	50	digits = (p >= POW10_OFFSET_2[idx + 1]) ? 0 : mulShift_mod1e9(m2 << 8, POW10_SPLIT_2[p], j + 8);
684			#ifdef RYU_DEBUG
685			printf("exact=%" PRIu64 " * (%" PRIu64 " + %" PRIu64 " << 64) >> %d\n", m2, POW10_SPLIT_2[p][0], POW10_SPLIT_2[p][1], j);
686			printf("digits=%u\n", digits);
687			#endif
688	265	100	if (printedDigits != 0) {
689	230	100	if (printedDigits + 9 > precision) {
690	35		availableDigits = 9;
691	35		break;
692			}
693	195		append_nine_digits(digits, result + index);
694	195		index += 9;
695	195		printedDigits += 9;
696	35	100	} else if (digits != 0) {
697	25		availableDigits = decimalLength9(digits);
698	25		exp = -(i + 1) * 9 + (int32_t) availableDigits - 1;
699	25	50	if (availableDigits > precision) {
700	0		break;
701			}
702	25	50	if (printDecimalPoint) {
703	25		append_d_digits(availableDigits, digits, result + index);
704	25		index += availableDigits + 1; // +1 for decimal point
705			} else {
706	0		result[index++] = (char) ('0' + digits);
707			}
708	25		printedDigits = availableDigits;
709	25		availableDigits = 0;
710			}
711			}
712			}
713
714	35		const uint32_t maximum = precision - printedDigits;
715			#ifdef RYU_DEBUG
716			printf("availableDigits=%u\n", availableDigits);
717			printf("digits=%u\n", digits);
718			printf("maximum=%u\n", maximum);
719			#endif
720	35	50	if (availableDigits == 0) {
721	0		digits = 0;
722			}
723	35		uint32_t lastDigit = 0;
724			uint32_t k;
725	35	50	if (availableDigits > maximum) {
726	300	100	for (k = 0; k < availableDigits - maximum; ++k) {
727	265		lastDigit = digits % 10;
728	265		digits /= 10;
729			}
730			}
731			#ifdef RYU_DEBUG
732			printf("lastDigit=%u\n", lastDigit);
733			#endif
734			// 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd.
735	35		int roundUp = 0;
736	35	100	if (lastDigit != 5) {
737	25		roundUp = lastDigit > 5;
738			} else {
739			// Is m * 2^e2 * 10^(precision + 1 - exp) integer?
740			// precision was already increased by 1, so we don't need to write + 1 here.
741	10		const int32_t rexp = (int32_t) precision - exp;
742	10		const int32_t requiredTwos = -e2 - rexp;
743	10		bool trailingZeros = requiredTwos <= 0
744	10	50	\|\| (requiredTwos < 60 && multipleOfPowerOf2(m2, (uint32_t) requiredTwos));
		50
		0
745	10	50	if (rexp < 0) {
746	0		const int32_t requiredFives = -rexp;
747	0	0	trailingZeros = trailingZeros && multipleOfPowerOf5(m2, (uint32_t) requiredFives);
		0
748			}
749	10	50	roundUp = trailingZeros ? 2 : 1;
750			#ifdef RYU_DEBUG
751			printf("requiredTwos=%d\n", requiredTwos);
752			printf("trailingZeros=%s\n", trailingZeros ? "true" : "false");
753			#endif
754			}
755	35	50	if (printedDigits != 0) {
756	35	100	if (digits == 0) {
757	10		memset(result + index, '0', maximum);
758			} else {
759	25		append_c_digits(maximum, digits, result + index);
760			}
761	35		index += maximum;
762			} else {
763	0	0	if (printDecimalPoint) {
764	0		append_d_digits(maximum, digits, result + index);
765	0		index += maximum + 1; // +1 for decimal point
766			} else {
767	0		result[index++] = (char) ('0' + digits);
768			}
769			}
770			#ifdef RYU_DEBUG
771			printf("roundUp=%d\n", roundUp);
772			#endif
773	35	100	if (roundUp != 0) {
774	10		int roundIndex = index;
775			while (true) {
776	10		--roundIndex;
777			char c;
778	10	50	if (roundIndex == -1 \|\| (c = result[roundIndex], c == '-')) {
		50
779	0		result[roundIndex + 1] = '1';
780	0		++exp;
781	0		break;
782			}
783	10	50	if (c == '.') {
784	0		continue;
785	10	50	} else if (c == '9') {
786	0		result[roundIndex] = '0';
787	0		roundUp = 1;
788	0		continue;
789			} else {
790	10	50	if (roundUp == 2 && c % 2 == 0) {
		0
791	0		break;
792			}
793	10		result[roundIndex] = c + 1;
794	10		break;
795			}
796	0		}
797			}
798	35		result[index++] = 'e';
799	35	100	if (exp < 0) {
800	25		result[index++] = '-';
801	25		exp = -exp;
802			} else {
803	10		result[index++] = '+';
804			}
805
806	35	100	if (exp >= 100) {
807	10		const int32_t c = exp % 10;
808	10		memcpy(result + index, DIGIT_TABLE + 2 * (exp / 10), 2);
809	10		result[index + 2] = (char) ('0' + c);
810	10		index += 3;
811			} else {
812	25		memcpy(result + index, DIGIT_TABLE + 2 * exp, 2);
813	25		index += 2;
814			}
815
816	35		return index;
817			}
818
819	7		void d2exp_buffered(double d, uint32_t precision, char* result) {
820	7		const int len = d2exp_buffered_n(d, precision, result);
821	7		result[len] = '\0';
822	7		}
823
824	21		char* d2exp(double d, uint32_t precision) {
825	21		char* const buffer = (char*)malloc(2000);
826	21		const int index = d2exp_buffered_n(d, precision, buffer);
827	21		buffer[index] = '\0';
828	21		return buffer;
829			}