[Python-checkins] r83813 - in python/branches/py3k-dtoa: Lib/test/test_strtod.py Python/dtoa.c
mark.dickinson
python-checkins at python.org
Sun Aug 8 11:57:27 CEST 2010
Author: mark.dickinson
Date: Sun Aug 8 11:57:27 2010
New Revision: 83813
Log:
Issue #9009: Rewrite _Py_dg_strtod. See the issue tracker for details.
Modified:
python/branches/py3k-dtoa/Lib/test/test_strtod.py
python/branches/py3k-dtoa/Python/dtoa.c
Modified: python/branches/py3k-dtoa/Lib/test/test_strtod.py
==============================================================================
--- python/branches/py3k-dtoa/Lib/test/test_strtod.py (original)
+++ python/branches/py3k-dtoa/Lib/test/test_strtod.py Sun Aug 8 11:57:27 2010
@@ -82,7 +82,7 @@
hexdigs,
e + 4*hexdigs)
-TEST_SIZE = 1000
+TEST_SIZE = 10
class StrtodTests(unittest.TestCase):
def check_strtod(self, s):
@@ -457,14 +457,42 @@
'168444365510704342711559699508093042880177904174497792.001',
# 1 - 2**-54, +-tiny
'999999999999999944488848768742172978818416595458984375e-54',
- '9999999999999999444888487687421729788184165954589843749999999e-54',
- '9999999999999999444888487687421729788184165954589843750000001e-54',
+ '999999999999999944488848768742172978818416595458984' #...
+ '3749999999e-54',
+ '999999999999999944488848768742172978818416595458984' #...
+ '3750000001e-54',
+ # Values near underflow-to-zero boundary; these ended up
+ # as 0.0 instead of -0.0 (or vice versa) with some
+ # nonstandard rounding modes.
+ '-20727364621e-334',
+ '247032822920623272088284396434110686182529901307162' #...
+ '38221279284125033775362990e-400',
]
for s in test_strings:
self.check_strtod(s)
+
+def test_with_rounding_mode(mode):
+ class StrtodTestsWithRounding(StrtodTests):
+ def setUp(self):
+ StrtodTests.setUp(self)
+ self.old_mode = float.__getround__()
+ float.__setround__(mode)
+
+ def tearDown(self):
+ float.__setround__(self.old_mode)
+ StrtodTests.tearDown(self)
+
+ StrtodTestsWithRounding.__name__ = "StrtodTestsWith{}Rounding".format(
+ mode.capitalize())
+ return StrtodTestsWithRounding
+
def test_main():
- test.support.run_unittest(StrtodTests)
+ test_classes = [StrtodTests]
+ if hasattr(float, '__setround__'):
+ for mode in "upward", "downward", "towardzero", "tonearest":
+ test_classes.append(test_with_rounding_mode(mode))
+ test.support.run_unittest(*test_classes)
if __name__ == "__main__":
test_main()
Modified: python/branches/py3k-dtoa/Python/dtoa.c
==============================================================================
--- python/branches/py3k-dtoa/Python/dtoa.c (original)
+++ python/branches/py3k-dtoa/Python/dtoa.c Sun Aug 8 11:57:27 2010
@@ -185,20 +185,23 @@
extern "C" {
#endif
-typedef union { double d; ULong L[2]; } U;
+typedef union { ULong L[2]; double d; } U;
#ifdef IEEE_8087
#define word0(x) (x)->L[1]
#define word1(x) (x)->L[0]
+#define Uint(hi, lo) {{lo, hi}}
#else
#define word0(x) (x)->L[0]
#define word1(x) (x)->L[1]
+#define Uint(hi, lo) {{hi, lo}}
#endif
#define dval(x) (x)->d
-#ifndef STRTOD_DIGLIM
-#define STRTOD_DIGLIM 40
-#endif
+/* STRTOD_DIGLIM = 17 + 8. 17 is the number of decimal digits required to
+ distinguish doubles; the extra 8 digits ensure that truncation to
+ STRTOD_DIGLIM digits induces an error of at most 1e-8 ulps. */
+#define STRTOD_DIGLIM 25
/* maximum permitted exponent value for strtod; exponents larger than
MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP
@@ -254,6 +257,13 @@
#define Quick_max 14
#define Int_max 14
+/* Some double constants, defined via the union U to avoid the chicken-and-egg
+ problem of relying on the compiler to do correctly-rounded string->double
+ conversions. */
+static U Dbl_min = Uint((Bias + Emin)*Exp_msk1, 0); /* 2.0 ** Emin */
+static U Exp4P = Uint((Bias + 2*P)*Exp_msk1, 0); /* 2.0**(2*P) */
+static U Inf = Uint(Exp_mask, 0);
+
#ifndef Flt_Rounds
#ifdef FLT_ROUNDS
#define Flt_Rounds FLT_ROUNDS
@@ -970,21 +980,6 @@
return c;
}
-/* Given a positive normal double x, return the difference between x and the
- next double up. Doesn't give correct results for subnormals. */
-
-static double
-ulp(U *x)
-{
- Long L;
- U u;
-
- L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1;
- word0(&u) = L;
- word1(&u) = 0;
- return dval(&u);
-}
-
/* Convert a Bigint a to a double giving the value a / 2**(32 * a->wds).
Error < 0.75 ulps. This function is currently used only by ratio. */
@@ -1155,14 +1150,8 @@
1e20, 1e21, 1e22
};
-static const double
-bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
-static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128,
- 9007199254740992.*9007199254740992.e-256
- /* = 2^106 * 1e-256 */
-};
-/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */
-/* flag unnecessarily. It leads to a song and dance at the end of strtod. */
+static const double bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 };
+static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, 1e-256 };
#define Scale_Bit 0x10
#define n_bigtens 5
@@ -1279,27 +1268,144 @@
return q;
}
-/* sulp(x) is a version of ulp(x) that takes bc.scale into account.
-
- Assuming that x is finite and nonnegative (positive zero is fine
- here) and x / 2^bc.scale is exactly representable as a double,
- sulp(x) is equivalent to 2^bc.scale * ulp(x / 2^bc.scale). */
+/* Round a finite double x to the nearest integer (in double format), rounding
+ * ties away from zero. This function is unaffected by FPU rounding mode. */
static double
-sulp(U *x, BCinfo *bc)
+rnd(double x)
{
U u;
+ int exp;
+ dval(&u) = x;
- if (bc->scale && 2*P + 1 > (int)((word0(x) & Exp_mask) >> Exp_shift)) {
- /* rv/2^bc->scale is subnormal */
- word0(&u) = (P+2)*Exp_msk1;
+ /* Extract unbiased exponent. */
+ exp = (int)((word0(&u) & Exp_mask) >> Exp_shift) - Bias;
+ if (exp < 0) {
+ /* Absolute value of x is at most 1.0; result is +-0.0 if exp < -1,
+ and +-1.0 if exp == -1. */
+ word0(&u) &= Sign_bit;
+ if (exp == -1)
+ word0(&u) |= Exp_1;
word1(&u) = 0;
- return u.d;
}
- else {
- assert(word0(x) || word1(x)); /* x != 0.0 */
- return ulp(x);
+ else if (exp <= P - 34) {
+ /* Absolute value of x is in [1.0, 2**(P-33)). The bit with value 0.5
+ is bit (P - 34 - exp) in the upper word. */
+ ULong mask = 1UL << ((P - 34) - exp);
+ /* Zero out all bits with value < 0.5. */
+ word1(&u) = 0;
+ word0(&u) &= -mask;
+ /* If bit with value 0.5 is set, round up. */
+ if (word0(&u) & mask)
+ word0(&u) += mask;
+ }
+ else if (exp <= P - 2) {
+ /* Absolute value of x is in [2**(P-33), 2**(P-1)). The bit with
+ value 0.5 is bit (P - 2 - exp) in the lower word. */
+ ULong mask = 1UL << ((P - 2) - exp);
+ word1(&u) &= -mask;
+ if (word1(&u) & mask) {
+ word1(&u) += mask;
+ if (word1(&u) < mask)
+ /* word1 addition overflowed; add carry into word0. */
+ ++word0(&u);
+ }
}
+ /* If exp >= P-1 then abs(x) >= 2**(P-1) and x is already
+ an integer, so there's nothing to do. */
+ return dval(&u);
+}
+
+/* In strtod, the current estimate is stored in a pair (x, scale) consisting
+ of a double x and an int scale, representing the value x / 2**scale.
+
+ For a finite double x, sulp(x) returns 1 ulp(x), taking scale into account.
+ That is, sulp(x, scale) / 2**scale == ulp(x / 2**scale), where ulp(x) is the
+ difference between abs(x) and the next largest double.
+
+ Logic in the strtod function ensures that the pair (x, scale) always
+ satisfies the following two conditions:
+
+ (0) x / 2**scale is an integral multiple of 2**(Emin - P + 1). In other
+ words, if x / 2**scale is at most DBL_MAX then it's exactly
+ representable as a double.
+
+ (1) Either x / 2**(P-1) >= 2**Emin, or scale >= P - 1.
+
+ It follows from these conditions that on input, x is necessarily either
+ normal or zero, and that sulp(x, scale) is always positive and normal.
+ Moreover, if x is zero then scale >= P - 1.
+
+*/
+
+static double
+sulp(U *x, int scale)
+{
+ U u;
+ int e;
+
+ /* Method: if 2**k <= abs(x / 2**scale) < 2**(k+1) then
+
+ ulp(x / 2**scale) == max(2**Etiny, 2**k / 2**(P - 1)).
+
+ We compute
+
+ e = max(scale + 1, k + Bias + scale)
+
+ Here k + Bias + scale corresponds exactly to the value given by the
+ exponent bits of x. From the conditions above, e > P - 1. Setting u =
+ 2**(e - Bias - P + 1) gives
+
+ u / 2**scale == 2**(e - Bias - P + 1 - scale)
+ == 2**max(2 - P - Bias, k - P + 1)
+ == max(2**Etiny, 2**k / 2**(P - 1)).
+ == ulp(x / 2**scale).
+
+ The code below also works, unaltered, with x == +-0.0,
+ returning u such that u / 2**scale == 2**Etiny.
+ */
+
+ e = (int)((word0(x) & Exp_mask) >> Exp_shift);
+ if (e < scale + 1)
+ e = scale + 1;
+
+ assert(e > P - 1);
+ word0(&u) = (ULong)(e - P + 1) << Exp_shift;
+ word1(&u) = 0;
+ return dval(&u);
+}
+
+/* Given a scaled double (as used in _Py_dg_strtod), find the next largest
+ boundary, using the same scale. If we're already on a boundary, return the
+ next one up.
+
+ A *boundary* is either 0.0, or a power of 2 that's at least 2**(Emin + 1);
+ 2**Emin is not considered a boundary, because the spacing of consecutive
+ floats does not change at 2**Emin. */
+
+double next_boundary(U *x, int scale) {
+ U u;
+ int e;
+
+ e = (int)((word0(x) & Exp_mask) >> Exp_shift);
+
+ if (e < scale + 1)
+ e = scale + 1;
+ word0(&u) = (ULong)(e + 1) << Exp_shift;
+ word1(&u) = 0;
+ return dval(&u);
+}
+
+double last_boundary(U *x, int scale) {
+ U u;
+ int e;
+
+ e = (int)((word0(x) & Exp_mask) >> Exp_shift);
+ if (e <= scale + 1)
+ e = 0;
+ word0(&u) = (ULong)e << Exp_shift;
+ word1(&u) = 0;
+ return dval(&u);
}
/* parse_numeric_string: Parse and validate a finite numeric string.
@@ -1655,553 +1761,332 @@
Bfree(b);
Bfree(d);
if (dd > 0 || (dd == 0 && odd))
- dval(rv) += sulp(rv, bc);
+ dval(rv) += sulp(rv, bc->scale);
return 0;
}
+/* Get current FPU rounding mode; same values as for FLT_ROUNDS
+ -1 for indeterminate
+ 0 for rounding towards zero
+ 1 for rounding towards nearest (with halves rounded towards even)
+ 2 for rounding upward (towards positive infinity)
+ 3 for rounding downward (towards negative infinity)
+*/
+
+static int
+get_rounding_mode(void)
+{
+ /* this may not work on all systems; we should try a configure-time
+ test. */
+ return FLT_ROUNDS;
+}
+
double
_Py_dg_strtod(const char *s00, char **se)
{
- int bb2, bb5, bbe, bd2, bd5, bs2, dsign, e, e1, error, i, j, k, odd, sign;
- Py_ssize_t nd, nd0, pos;
+ int bbe, dsign, e, e1, e2, e5, error, i, k, scale, sign;
+ Py_ssize_t nd, nd0;
char *s0;
- double aadj, aadj1;
- U aadj2, adj, rv, rv0;
- ULong y, z;
- Long L;
+ double aadj, aadj_int, scalefac, ulp;
+ U rv;
+ Bigint *bb, *bd, *bs, *delta;
BCinfo bc;
- Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
-
- dval(&rv) = 0.;
error = parse_numeric_string(s00, se, &s0, &nd, &nd0, &e, &sign);
if (error)
goto parse_error;
/* If all digits were zero, exit with return value +-0.0. */
- if (!nd)
+ if (!nd) {
+ dval(&rv) = 0.0;
goto ret;
+ }
- /* Overflow and underflow. */
+ /* Catch cases of obvious overflow and underflow. */
if (e > Big_10_exp)
goto ovfl;
else if (e <= Tiny_10_exp)
goto undfl;
- /* Initial approximation based on first DBL_DIG+1 digits of the input. */
- k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1;
- for (pos = 0; pos < k; pos++)
- dval(&rv) = 10.0 * dval(&rv) + (s0[pos < nd0 ? pos : pos + 1] - '0');
+ /* Compute an initial approximation of the form rv * 10 ** e1, where rv is
+ * the integer obtained from the first (up to) DBL_DIG digits of the
+ * input. */
+ k = nd <= DBL_DIG ? nd : DBL_DIG;
+ dval(&rv) = 0.0;
+ for (i = 0; i < k; i++)
+ dval(&rv) = 10.0 * dval(&rv) + (s0[i < nd0 ? i : i + 1] - '0');
e1 = e - k;
- /* If there are at most Dbl_dig significant digits in the input, then rv
- is exact and there's a chance to compute the exact result with a single
- floating-point multiplication or division. */
- if (nd <= DBL_DIG) {
- if (!e1)
- goto ret;
- if (e1 > 0) {
- if (e1 <= Ten_pmax) {
- dval(&rv) *= tens[e1];
- goto ret;
+ /* If the input has at most DBL_DIG significant digits (that is, nd <=
+ * DBL_DIG), then the represented value is exactly +/- rv * 10 ** e1. In
+ * this case, if e1 is small enough *and* the FPU rounding mode matches the
+ * one we want, then we can compute a correctly rounded result with a
+ * single floating-point multiplication or division. */
+ if (get_rounding_mode() == 1) {
+ if (nd <= DBL_DIG) {
+ if (e1 >= 0) {
+ if (e1 <= Ten_pmax) {
+ dval(&rv) *= tens[e1];
+ goto ret;
+ }
+ i = DBL_DIG - nd;
+ if (e1 - i <= Ten_pmax) {
+ dval(&rv) *= tens[i];
+ dval(&rv) *= tens[e1 - i];
+ goto ret;
+ }
}
- i = DBL_DIG - nd;
- if (e1 - i <= Ten_pmax) {
- /* A fancier test would sometimes let us do
- * this for larger i values.
- */
- dval(&rv) *= tens[i];
- dval(&rv) *= tens[e1 - i];
+ else if (-e1 <= Ten_pmax) {
+ dval(&rv) /= tens[-e1];
goto ret;
}
}
- else if (-e1 <= Ten_pmax) {
- dval(&rv) /= tens[-e1];
- goto ret;
- }
}
- bc.scale = 0;
-
- /* Get starting approximation = rv * 10**e1 */
+ /* The fast path above wasn't applicable, so at this point we have to do
+ * things the hard way. Here's an outline of the strategy.
+ *
+ * (1) Compute a double approximation 'rv' to the true value of the
+ * input. There are various sources of error in this approximation:
+ * it's based on the first (at most) DBL_DIG significant digits of the
+ * input, as computed above, so there's a truncation error involved; the
+ * floating-point arithmetic used to compute the approximation will also
+ * contribute error. 1000 ulps is a safe upper bound for the magnitude
+ * of the error, though it will be much smaller in most cases.
+ *
+ * (2) Use Bigint arithmetic to compute an approximation 'aadj' to ulps
+ * difference between rv and the true value. Again there are sources of
+ * error here: instead of computing with the true value, we compute with
+ * a value truncated to at most STRTOD_DIGLIM digits; this introduces an
+ * absolute error of at most 1e-8 to aadj. Rounding error may
+ * contribute a further absolute error of 1e-10.
+ *
+ * (3) Adjust rv based on the ulps difference computed above. In most
+ * cases, the result will be unambiguous and we can return after this
+ * step. But:
+ *
+ * (4) If the ulps difference value computed in (2) indicates that the
+ * true value lies very nearly halfway between two representable
+ * doubles, then because of the uncertainty in aadj itself we need to
+ * more work. In this case the decision is passed off to the 'bigcomp'
+ * function, which does a comparison with the original (not truncated)
+ * input value in order to figure out which way to round.
+ *
+ * If the value represented by the input is close to the limits of the
+ * range of the double type (i.e., either very large or very small), then
+ * it's convenient to using a scaling factor 'scalefac == 2**scale' to
+ * ensure that rv is normal and not too close to the overflow boundary;
+ * thus our approximation will actually be rv / 2**scale (or rv / scalefac)
+ * rather than rv itself.
+ */
+ /* Get starting approximation, rv * 10**e1. For small inputs, rv is
+ * scaled by a factor of 2**(2*P); that is, we compute the value rv *
+ * 10**e1 * 2**(2*P). For large inputs, we scale in the opposite
+ * direction, computing rv * 10**e1 / 2**(2*P). */
+ scale = 0;
+ scalefac = 1.0;
if (e1 > 0) {
- if ((i = e1 & 15))
- dval(&rv) *= tens[i];
- if (e1 &= ~15) {
- if (e1 > DBL_MAX_10_EXP)
- goto ovfl;
- e1 >>= 4;
- for(j = 0; e1 > 1; j++, e1 >>= 1)
- if (e1 & 1)
- dval(&rv) *= bigtens[j];
- /* The last multiplication could overflow. */
- word0(&rv) -= P*Exp_msk1;
- dval(&rv) *= bigtens[j];
- if ((z = word0(&rv) & Exp_mask)
- > Exp_msk1*(DBL_MAX_EXP+Bias-P))
- goto ovfl;
- if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) {
- /* set to largest number */
- /* (Can't trust DBL_MAX) */
- word0(&rv) = Big0;
- word1(&rv) = Big1;
- }
- else
- word0(&rv) += P*Exp_msk1;
- }
+ dval(&rv) *= tens[e1 & 15];
+ e1 >>= 4;
+ if (e1 & Scale_Bit) {
+ scale = -2*P;
+ scalefac = 1.0 / dval(&Exp4P);
+ dval(&rv) *= scalefac;
+ }
+ for(i = 0; e1 > 0; i++, e1 >>= 1)
+ if (e1 & 1)
+ dval(&rv) *= bigtens[i];
}
else if (e1 < 0) {
- /* The input decimal value lies in [10**e1, 10**(e1+16)).
-
- If e1 <= -512, underflow immediately.
- If e1 <= -256, set bc.scale to 2*P.
+ e1 = -e1;
+ dval(&rv) /= tens[e1 & 15];
+ e1 >>= 4;
+ if (e1 & Scale_Bit) {
+ scale = 2*P;
+ scalefac = dval(&Exp4P);
+ dval(&rv) *= scalefac;
+ }
+ for(i = 0; e1 > 0; i++, e1 >>= 1)
+ if (e1 & 1)
+ dval(&rv) *= tinytens[i];
+ }
+
+ /* Some inequalities:
+
+ -324 < e <= 309 (so 10**-324 <= dv < 10**309).
+ 1 <= k <= DBL_DIG (= 15).
+ e = e1 + k (using the value of e1 before entering the above loops)
+ scale > 0 iff e1 <= -256, and scale < 0 iff e1 >= 256
+
+ From these we deduce:
+
+ If scale > 0 then 1e-324 <= dv < 1e-241.
+ If scale == 0 then 1e-255 <= dv < 1e+270.
+ If scale < 0 then 1e+256 <= dv < 1e+309.
- So for input value < 1e-256, bc.scale is always set;
- for input value >= 1e-240, bc.scale is never set.
- For input values in [1e-256, 1e-240), bc.scale may or may
- not be set. */
+ */
- e1 = -e1;
- if ((i = e1 & 15))
- dval(&rv) /= tens[i];
- if (e1 >>= 4) {
- if (e1 >= 1 << n_bigtens)
- goto undfl;
- if (e1 & Scale_Bit)
- bc.scale = 2*P;
- for(j = 0; e1 > 0; j++, e1 >>= 1)
- if (e1 & 1)
- dval(&rv) *= tinytens[j];
- if (bc.scale && (j = 2*P + 1 - ((word0(&rv) & Exp_mask)
- >> Exp_shift)) > 0) {
- /* scaled rv is denormal; clear j low bits */
- if (j >= 32) {
- word1(&rv) = 0;
- if (j >= 53)
- word0(&rv) = (P+2)*Exp_msk1;
- else
- word0(&rv) &= 0xffffffff << (j-32);
- }
- else
- word1(&rv) &= 0xffffffff << j;
- }
- if (!dval(&rv))
- goto undfl;
- }
+ /* Ensure that rv / 2**scale is exactly representable. This is an issue
+ * only if scale > 0 and rv / 2**scale is subnormal: rv itself will be
+ * normal in this case, so will have a full 53-bit significand, but rv /
+ * 2**scale won't have that many bits available.
+ *
+ * Evil trickery ahead: We want to round rv to an exact multiple of
+ * 2**scale * DBL_MIN * 2**(1 - P). Rather than doing this by hand, we
+ * persuade the FPU to do the rounding for us: assuming that rv < 2**scale
+ * DBL_MIN, we add 2**scale * DBL_MIN to rv. The result is in the binade
+ * [2**scale * DBL_MIN, 2 * 2**scale * DBL_MIN], and the representable
+ * floating-point numbers in this binade are exactly the multiples of
+ * 2**scale * DBL_MIN * 2**(1 - P), hence the result of the addition is the
+ * exact sum rounded to the nearest multiple of 2**scale * DBL_MIN *
+ * 2**(1-P). Subtracting 2**scale * DBL_MIN again gives the rounded rv
+ * value.
+ *
+ * Note that at this point rv is still just an approximation, so we don't
+ * care about discrepancies due to rounding mode.
+ */
+ if (scale > 0 && dval(&rv) < dval(&Dbl_min) * scalefac) {
+ dval(&rv) += dval(&Dbl_min) * scalefac;
+ dval(&rv) -= dval(&Dbl_min) * scalefac;
}
- /* Now the hard part -- adjusting rv to the correct value.*/
+ /* We now have our starting approximation rv / 2**scale. Note that it's
+ * not possible for rv to be *strictly* negative at this point, though it
+ * might be -0.0; this is dealt with towards the end of the function.
+ */
+ assert(dval(&rv) >= 0.0);
- /* Put digits into bd: true value = bd * 10^e */
+ /* Step (2): Compute 'aadj', an (approximation to) ulps difference between
+ * the true value (after truncation to STRTOD_DIGLIM significant digits)
+ * and the current approximation rv / 2**scale.
+ *
+ * From this point onwards, all floating-point operations on rv that are
+ * performed have an exactly representable result. This ensures that the
+ * FPU rounding mode can't affect the result.
+ */
- bc.e0 = e;
- bc.nd = nd;
- bc.nd0 = nd0; /* Only needed if nd > STRTOD_DIGLIM, but done here */
- /* to silence an erroneous warning about bc.nd0 */
- /* possibly not being initialized. */
- if (nd > STRTOD_DIGLIM) {
- /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */
- /* minimum number of decimal digits to distinguish double values */
- /* in IEEE arithmetic. */
+ /* Notation used in the comments: 'srv' is rv / 2**scale; 'tdv' is the
+ value represented by the input (after truncation); 'ulp' is ulp(srv).
+ We're aiming to compute (srv - tdv) / ulp. We do this by using integer
+ arithmetic to compute (M*srv - M*tdv) / (M*ulp) for a suitable scale
+ factor M for which M*srv, M*tdv and M*ulp are all integral. */
+
+ k = nd < STRTOD_DIGLIM ? nd : STRTOD_DIGLIM;
+ bd = s2b(s0, nd0, k); /* tdv == bd * 10**(e - k). */
+ bb = sd2b(&rv, scale, &bbe); /* srv == bb * 2**bbe. */
+ bs = i2b(1); /* ulp == 2**bbe. */
+ if (bd == NULL || bb == NULL || bs == NULL)
+ goto failed_malloc;
- /* Truncate input to 18 significant digits, then discard any trailing
- zeros on the result. */
- nd = 18;
- while (nd > 0 && s0[nd - 1 < nd0 ? nd - 1 : nd] == '0')
- nd--;
+ /* srv, tdv and ulp are proportional to bb, bd * 2**(e-k-bbe)*5**(e-k) and
+ bs, respectively. Scale bb, bd, bs by the appropriate powers of 5... */
+ e5 = e - k;
+ if (e5 > 0)
+ bd = pow5mult(bd, e5);
+ else if (e5 < 0) {
+ bb = pow5mult(bb, -e5);
+ bs = pow5mult(bs, -e5);
}
- bd0 = s2b(s0, nd0, nd);
- if (bd0 == NULL)
+ if (bd == NULL || bb == NULL || bs == NULL)
goto failed_malloc;
- /* Notation for the comments below. Write:
-
- - dv for the absolute value of the number represented by the original
- decimal input string.
-
- - if we've truncated dv, write tdv for the truncated value.
- Otherwise, set tdv == dv.
-
- - srv for the quantity rv/2^bc.scale; so srv is the current binary
- approximation to tdv (and dv). It should be exactly representable
- in an IEEE 754 double.
- */
-
- for(;;) {
-
- /* This is the main correction loop for _Py_dg_strtod.
-
- We've got a decimal value tdv, and a floating-point approximation
- srv=rv/2^bc.scale to tdv. The aim is to determine whether srv is
- close enough (i.e., within 0.5 ulps) to tdv, and to compute a new
- approximation if not.
-
- To determine whether srv is close enough to tdv, compute integers
- bd, bb and bs proportional to tdv, srv and 0.5 ulp(srv)
- respectively, and then use integer arithmetic to determine whether
- |tdv - srv| is less than, equal to, or greater than 0.5 ulp(srv).
- */
-
- bd = Balloc(bd0->k);
- if (bd == NULL) {
- Bfree(bd0);
- goto failed_malloc;
- }
- Bcopy(bd, bd0);
- bb = sd2b(&rv, bc.scale, &bbe); /* srv = bb * 2^bbe */
- if (bb == NULL) {
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- /* Record whether lsb of bb is odd, in case we need this
- for the round-to-even step later. */
- odd = bb->x[0] & 1;
-
- /* tdv = bd * 10**e; srv = bb * 2**bbe */
- bs = i2b(1);
- if (bs == NULL) {
- Bfree(bb);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
-
- if (e >= nd) {
- bb2 = bb5 = 0;
- bd2 = bd5 = e - nd;
- }
- else {
- bb2 = bb5 = nd - e;
- bd2 = bd5 = 0;
- }
- if (bbe >= 0)
- bb2 += bbe;
- else
- bd2 -= bbe;
- bs2 = bb2;
- bb2++;
- bd2++;
-
- /* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
- and bs == 1, so:
-
- tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
- srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
- 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
-
- It follows that:
-
- M * tdv = bd * 2**bd2 * 5**bd5
- M * srv = bb * 2**bb2 * 5**bb5
- M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
-
- for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
- this fact is not needed below.)
- */
-
- /* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */
- i = bb2 < bd2 ? bb2 : bd2;
- if (i > bs2)
- i = bs2;
- if (i > 0) {
- bb2 -= i;
- bd2 -= i;
- bs2 -= i;
- }
-
- /* Scale bb, bd, bs by the appropriate powers of 2 and 5. */
- if (bb5 > 0) {
- bs = pow5mult(bs, bb5);
- if (bs == NULL) {
- Bfree(bb);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- bb1 = mult(bs, bb);
- Bfree(bb);
- bb = bb1;
- if (bb == NULL) {
- Bfree(bs);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- }
- if (bb2 > 0) {
- bb = lshift(bb, bb2);
- if (bb == NULL) {
- Bfree(bs);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- }
- if (bd5 > 0) {
- bd = pow5mult(bd, bd5);
- if (bd == NULL) {
- Bfree(bb);
- Bfree(bs);
- Bfree(bd0);
- goto failed_malloc;
- }
- }
- if (bd2 > 0) {
- bd = lshift(bd, bd2);
- if (bd == NULL) {
- Bfree(bb);
- Bfree(bs);
- Bfree(bd0);
- goto failed_malloc;
- }
- }
- if (bs2 > 0) {
- bs = lshift(bs, bs2);
- if (bs == NULL) {
- Bfree(bb);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- }
-
- /* Now bd, bb and bs are scaled versions of tdv, srv and 0.5 ulp(srv),
- respectively. Compute the difference |tdv - srv|, and compare
- with 0.5 ulp(srv). */
-
- delta = diff(bb, bd);
- if (delta == NULL) {
- Bfree(bb);
- Bfree(bs);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- dsign = delta->sign;
- delta->sign = 0;
- i = cmp(delta, bs);
- if (bc.nd > nd && i <= 0) {
- if (dsign)
- break; /* Must use bigcomp(). */
-
- /* Here rv overestimates the truncated decimal value by at most
- 0.5 ulp(rv). Hence rv either overestimates the true decimal
- value by <= 0.5 ulp(rv), or underestimates it by some small
- amount (< 0.1 ulp(rv)); either way, rv is within 0.5 ulps of
- the true decimal value, so it's possible to exit.
-
- Exception: if scaled rv is a normal exact power of 2, but not
- DBL_MIN, then rv - 0.5 ulp(rv) takes us all the way down to the
- next double, so the correctly rounded result is either rv - 0.5
- ulp(rv) or rv; in this case, use bigcomp to distinguish. */
-
- if (!word1(&rv) && !(word0(&rv) & Bndry_mask)) {
- /* rv can't be 0, since it's an overestimate for some
- nonzero value. So rv is a normal power of 2. */
- j = (int)(word0(&rv) & Exp_mask) >> Exp_shift;
- /* rv / 2^bc.scale = 2^(j - 1023 - bc.scale); use bigcomp if
- rv / 2^bc.scale >= 2^-1021. */
- if (j - bc.scale >= 2) {
- dval(&rv) -= 0.5 * sulp(&rv, &bc);
- break; /* Use bigcomp. */
- }
- }
+ /* ... and powers of 2. */
+ e2 = e - k - bbe;
+ if (e2 > 0)
+ bd = lshift(bd, e2);
+ else if (e2 < 0) {
+ bb = lshift(bb, -e2);
+ bs = lshift(bs, -e2);
+ }
+ if (bd == NULL || bb == NULL || bs == NULL)
+ goto failed_malloc;
- {
- bc.nd = nd;
- i = -1; /* Discarded digits make delta smaller. */
- }
- }
+ /* Now srv, tdv and ulp are proportional to bb, bd, and bs, respectively.
+ * Compute the quotient aadj = (srv - tdv) / ulp = (bb - bd) / bs, as a
+ * double. */
+ delta = diff(bb, bd);
+ dsign = delta->sign;
+ if (delta == NULL)
+ goto failed_malloc;
+ aadj = ratio(delta, bs);
+ Bfree(delta);
+ if (dsign)
+ aadj = -aadj;
+ Bfree(bd);
+ Bfree(bb);
+ Bfree(bs);
- if (i < 0) {
- /* Error is less than half an ulp -- check for
- * special case of mantissa a power of two.
- */
- if (dsign || word1(&rv) || word0(&rv) & Bndry_mask
- || (word0(&rv) & Exp_mask) <= (2*P+1)*Exp_msk1
- ) {
- break;
- }
- if (!delta->x[0] && delta->wds <= 1) {
- /* exact result */
- break;
- }
- delta = lshift(delta,Log2P);
- if (delta == NULL) {
- Bfree(bb);
- Bfree(bs);
- Bfree(bd);
- Bfree(bd0);
- goto failed_malloc;
- }
- if (cmp(delta, bs) > 0)
- goto drop_down;
- break;
- }
- if (i == 0) {
- /* exactly half-way between */
- if (dsign) {
- if ((word0(&rv) & Bndry_mask1) == Bndry_mask1
- && word1(&rv) == (
- (bc.scale &&
- (y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1) ?
- (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
- 0xffffffff)) {
- /*boundary case -- increment exponent*/
- word0(&rv) = (word0(&rv) & Exp_mask)
- + Exp_msk1
- ;
- word1(&rv) = 0;
- dsign = 0;
- break;
- }
- }
- else if (!(word0(&rv) & Bndry_mask) && !word1(&rv)) {
- drop_down:
- /* boundary case -- decrement exponent */
- if (bc.scale) {
- L = word0(&rv) & Exp_mask;
- if (L <= (2*P+1)*Exp_msk1) {
- if (L > (P+2)*Exp_msk1)
- /* round even ==> */
- /* accept rv */
- break;
- /* rv = smallest denormal */
- if (bc.nd > nd)
- break;
- goto undfl;
- }
- }
- L = (word0(&rv) & Exp_mask) - Exp_msk1;
- word0(&rv) = L | Bndry_mask1;
- word1(&rv) = 0xffffffff;
- break;
- }
- if (!odd)
- break;
- if (dsign)
- dval(&rv) += sulp(&rv, &bc);
- else {
- dval(&rv) -= sulp(&rv, &bc);
- if (!dval(&rv)) {
- if (bc.nd >nd)
- break;
- goto undfl;
- }
- }
- dsign = 1 - dsign;
- break;
- }
- if ((aadj = ratio(delta, bs)) <= 2.) {
- if (dsign)
- aadj = aadj1 = 1.;
- else if (word1(&rv) || word0(&rv) & Bndry_mask) {
- if (word1(&rv) == Tiny1 && !word0(&rv)) {
- if (bc.nd >nd)
- break;
- goto undfl;
- }
- aadj = 1.;
- aadj1 = -1.;
- }
- else {
- /* special case -- power of FLT_RADIX to be */
- /* rounded down... */
+ /* Step (3): Adjust rv using the computed value of aadj. In most cases the
+ correct result is unambiguous; in hard cases we move on to step (4). */
- if (aadj < 2./FLT_RADIX)
- aadj = 1./FLT_RADIX;
- else
- aadj *= 0.5;
- aadj1 = -aadj;
- }
- }
- else {
+ ulp = sulp(&rv, scale);
+ /* Invariant: here and after the adjustment, srv ~= tdv + ulp * aadj. */
+ if (aadj <= 0.0) {
+ /* next = ulps from next boundary up to rv (will be negative) */
+ double next = (dval(&rv) - next_boundary(&rv, scale))/ulp;
+ if (aadj <= next) {
+ /* Adjustment takes us past a power of 2 boundary, going up. */
aadj *= 0.5;
- aadj1 = dsign ? aadj : -aadj;
- if (Flt_Rounds == 0)
- aadj1 += 0.5;
- }
- y = word0(&rv) & Exp_mask;
-
- /* Check for overflow */
-
- if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) {
- dval(&rv0) = dval(&rv);
- word0(&rv) -= P*Exp_msk1;
- adj.d = aadj1 * ulp(&rv);
- dval(&rv) += adj.d;
- if ((word0(&rv) & Exp_mask) >=
- Exp_msk1*(DBL_MAX_EXP+Bias-P)) {
- if (word0(&rv0) == Big0 && word1(&rv0) == Big1) {
- Bfree(bb);
- Bfree(bd);
- Bfree(bs);
- Bfree(bd0);
- Bfree(delta);
- goto ovfl;
- }
- word0(&rv) = Big0;
- word1(&rv) = Big1;
- goto cont;
- }
- else
- word0(&rv) += P*Exp_msk1;
+ next *= 0.5;
+ ulp *= 2.0;
+ /* Round to integer if next is even, else to half-odd-integer. */
+ aadj_int = next + rnd(aadj - next);
}
else {
- if (bc.scale && y <= 2*P*Exp_msk1) {
- if (aadj <= 0x7fffffff) {
- if ((z = (ULong)aadj) <= 0)
- z = 1;
- aadj = z;
- aadj1 = dsign ? aadj : -aadj;
- }
- dval(&aadj2) = aadj1;
- word0(&aadj2) += (2*P+1)*Exp_msk1 - y;
- aadj1 = dval(&aadj2);
- }
- adj.d = aadj1 * ulp(&rv);
- dval(&rv) += adj.d;
- }
- z = word0(&rv) & Exp_mask;
- if (bc.nd == nd) {
- if (!bc.scale)
- if (y == z) {
- /* Can we stop now? */
- L = (Long)aadj;
- aadj -= L;
- /* The tolerances below are conservative. */
- if (dsign || word1(&rv) || word0(&rv) & Bndry_mask) {
- if (aadj < .4999999 || aadj > .5000001)
- break;
- }
- else if (aadj < .4999999/FLT_RADIX)
- break;
- }
+ /* Usual case. */
+ aadj_int = rnd(aadj);
}
- cont:
- Bfree(bb);
- Bfree(bd);
- Bfree(bs);
- Bfree(delta);
}
- Bfree(bb);
- Bfree(bd);
- Bfree(bs);
- Bfree(bd0);
- Bfree(delta);
- if (bc.nd > nd) {
+ else {
+ double last = (dval(&rv) - last_boundary(&rv, scale))/ulp;
+ if (aadj > last && last_boundary(&rv, scale) != 0.0) {
+ /* Adjustment takes us past a power of 2 boundary, going down. */
+ aadj *= 2.0;
+ ulp *= 0.5;
+ }
+ aadj_int = rnd(aadj);
+ }
+
+ /* Adjust, and compute residual error. */
+ dval(&rv) -= aadj_int * ulp;
+ aadj -= aadj_int;
+
+ /* If we're in a near halfway case, use bigcomp to figure out
+ the correctly-rounded value */
+ assert(aadj > -0.5000001 && aadj < 0.5000001);
+ if (aadj < -0.4999999 || aadj > 0.4999999) {
+ /* input to bigcomp should be the lower of the two possible results */
+ if (aadj > 0.0) {
+ dval(&rv) -= ulp;
+ /* aadj += 1.0; */ /* Necessary only to maintain invariant; not
+ * used beyond this point. */
+ }
+ bc.nd0 = nd0;
+ bc.nd = nd;
+ bc.e0 = e;
+ bc.scale = scale;
error = bigcomp(&rv, s0, &bc);
if (error)
goto failed_malloc;
}
- if (bc.scale) {
- word0(&rv0) = Exp_1 - 2*P*Exp_msk1;
- word1(&rv0) = 0;
- dval(&rv) *= dval(&rv0);
- }
+ /* Under some rounding modes (particularly FE_DOWNWARD), it's possible for
+ * dval(&rv) to be -0.0 at this point; we want to get rid of the negative
+ * sign. */
+ if (word0(&rv) & Sign_bit)
+ word0(&rv) &= ~Sign_bit;
+
+ /* Do underflow and overflow checks, and undo the effects of scaling (if
+ * applicable). */
+ if (word0(&rv) == 0 && word1(&rv) == 0)
+ goto undfl;
+ if ((int)((word0(&rv) & Exp_mask) >> Exp_shift) - scale > Emax + Bias)
+ goto ovfl;
+
+ /* Undo the effects of the scaling, if applicable. */
+ if (scale)
+ dval(&rv) /= scalefac;
ret:
return sign ? -dval(&rv) : dval(&rv);
@@ -2210,6 +2095,9 @@
return 0.0;
failed_malloc:
+ if (bd != NULL) Bfree(bd);
+ if (bb != NULL) Bfree(bb);
+ if (bs != NULL) Bfree(bs);
errno = ENOMEM;
return -1.0;
@@ -2218,9 +2106,7 @@
ovfl:
errno = ERANGE;
- /* Can't trust HUGE_VAL */
- word0(&rv) = Exp_mask;
- word1(&rv) = 0;
+ dval(&rv) = dval(&Inf);
return sign ? -dval(&rv) : dval(&rv);
}
More information about the Python-checkins
mailing list