[Python-checkins] r82079 - python/branches/py3k-dtoa/Python/dtoa.c
mark.dickinson
python-checkins at python.org
Fri Jun 18 23:16:20 CEST 2010
Author: mark.dickinson
Date: Fri Jun 18 23:16:20 2010
New Revision: 82079
Log:
Pull out strtod parsing code into a separate function 'parse_numeric_string'
Modified:
python/branches/py3k-dtoa/Python/dtoa.c
Modified: python/branches/py3k-dtoa/Python/dtoa.c
==============================================================================
--- python/branches/py3k-dtoa/Python/dtoa.c (original)
+++ python/branches/py3k-dtoa/Python/dtoa.c Fri Jun 18 23:16:20 2010
@@ -242,6 +242,8 @@
#define Frac_mask 0xfffff
#define Frac_mask1 0xfffff
#define Ten_pmax 22
+#define Big_10_exp 309 /* Values >= 10**Big_10_exp overflow. */
+#define Tiny_10_exp -324 /* Values < 10**Tiny_10_exp underflow to zero. */
#define Bletch 0x10
#define Bndry_mask 0xfffff
#define Bndry_mask1 0xfffff
@@ -270,7 +272,8 @@
typedef struct BCinfo BCinfo;
struct
BCinfo {
- int e0, nd, nd0, scale;
+ Py_ssize_t nd, nd0;
+ int e0, scale;
};
#define FFFFFFFF 0xffffffffUL
@@ -479,13 +482,13 @@
failure. Assumes that nd >= 1 and that the first digit is nonzero. */
static Bigint *
-s2b(const char *s0, int nd0, int nd)
+s2b(const char *s0, Py_ssize_t nd0, Py_ssize_t nd)
{
Bigint *b;
- int i, k;
- Long x, y;
+ int k;
+ Py_ssize_t i, x, y;
- x = (nd + 8) / 9;
+ x = 1 + (nd - 1) / 9;
for (k = 0, y = 1; x > y; y <<= 1, k++) ;
b = Balloc(k);
if (b == NULL)
@@ -1318,6 +1321,212 @@
}
}
+/* parse_numeric_string: Parse and validate a finite numeric string.
+
+ Inputs:
+
+ A NUL-terminated string s00 with an initial portion that represents a
+ finite decimal value. Valid numeric strings are described by the
+ following pseudo-grammar:
+
+ sign = '+' | '-'
+ digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
+ point = '.'
+ indicator = 'e' | 'E'
+ digits = digit [digits]
+ significand = digits [point [digits]] | point digits
+ exponent = indicator [sign] digits
+ numeric-string = [sign] significand [exponent]
+
+ NaNs, infinities and hex literals are not accepted, and leading
+ whitespace is not permitted.
+
+ Outputs:
+
+ Any nonzero finite decimal value as above can be uniquely expressed in
+ the form
+
+ (-1)**sign * 0.<digits> * 10**exp
+
+ where sign is 0 or 1, exp is an integer, and <digits> is a string of one
+ ore more digits with both the first and last digit in the string nonzero.
+ The following return values from parse_numeric_string provide access to
+ these values:
+
+ *sign gives the sign of the input: 1 for negative, 0 for positive.
+
+ *pnd is the length of <digits>
+
+ *ps0 is a pointer into the input string s00, and *pnd0 is an integer;
+ their values are such that for each i in the range 0 <= i < *pnd, the
+ ith digit <digits>[i] can be retrieved as:
+
+ (*ps0)[i < *pnd0 ? i : i + 1]
+
+ [ In the current implementation, *ps0 points just past the last
+ leading zero in the input string, so will either point to a nonzero
+ digit or to the decimal point. *pnd0 gives the position of the
+ decimal point, or of the end of the digit string if no decimal point
+ is present, relative to ps0. Note that *pnd0 may be larger than *pnd,
+ or smaller than 0. ]
+
+ *exp is the exponent in the formula above, clipped to lie in the range
+ [INT_MIN, INT_MAX].
+
+ For an input representing zero, *pnd will be 0.
+
+ *se points at the first character past the largest initial sequence
+ of s00 that gives a valid numeric string. If the entire string
+ s00 is a valid numeric string, *se will point to its terminating NUL
+ character. If no initial portion of s00 gives a valid numeric
+ string then *se will be equal to s00.
+
+ Returns 0 for a successful parse, and a nonzero value on failure.
+*/
+
+static int
+parse_numeric_string(const char *s00, char **se, char **ps0,
+ Py_ssize_t *pnd, Py_ssize_t *pnd0,
+ int *exp, int *sign)
+{
+ char c;
+ const char *s, *s0, *s1;
+ int esign, rv;
+ unsigned int dig;
+ Py_ssize_t e, nd, nd0, nz;
+ size_t abs_exp;
+
+ s = s00;
+ c = *s;
+
+ /* Parse optional sign, if present. */
+ *sign = 0;
+ switch (c) {
+ case '-':
+ *sign = 1;
+ /* no break */
+ case '+':
+ c = *++s;
+ }
+
+ /* Skip (and count) leading zeros. */
+ s0 = s;
+ while (c == '0')
+ c = *++s;
+ nz = s - s0;
+
+ /* Parse any remaining digits before the point. */
+ while ('0' <= c && c <= '9')
+ c = *++s;
+ nd0 = nd = s - s0;
+
+ /* Parse decimal point and following digits. */
+ if (c == '.') {
+ c = *++s;
+ s1 = s;
+ /* If all digits so far are zeros, continue to count leading zeros. */
+ if (nd == nz) {
+ while (c == '0')
+ c = *++s;
+ nz += s - s1;
+ }
+ while ('0' <= c && c <= '9')
+ c = *++s;
+ nd += s - s1;
+ }
+
+ if (!nd) {
+ /* No digits in the significand, so we've got an invalid numeric
+ string and a parse failure. */
+ rv = 1;
+ *exp = 0; /* Silence a gcc 'may be used uninitialized' warning. */
+ goto exit;
+ }
+
+ /* We've got at least one digit, so the string up to this point is a
+ valid numeric string and the parse is successful. */
+ rv = 0;
+ s00 = s;
+
+ /* Adjust s0, nd, nd0 for leading zeros. */
+ s0 += nz;
+ nd -= nz;
+ nd0 -= nz;
+
+ /* The ith digit of the significand can now be retrieved as s0[i < nd0 ? i
+ : i + 1]. Discard any trailing zeros. */
+ while (nd > 0 && s0[nd - 1 < nd0 ? nd - 1 : nd] == '0')
+ nd--;
+
+ /* Parse exponent. */
+ esign = 0;
+ abs_exp = 0U;
+ if (c == 'e' || c == 'E') {
+ c = *++s;
+
+ /* Exponent sign. */
+ esign = 0;
+ switch (c) {
+ case '-':
+ esign = 1;
+ /* no break */
+ case '+':
+ c = *++s;
+ }
+
+ /* Get absolute value abs_exp for the exponent; cap at SIZE_T_MAX. */
+ s1 = s;
+ while ('0' <= c && c <= '9') {
+ dig = (unsigned int)(c - '0');
+ /* Safe version of: if (10U * abs_exp + dig > SIZE_T_MAX) {...} */
+ if (abs_exp >= SIZE_T_MAX / 10U && (abs_exp > SIZE_T_MAX / 10U ||
+ dig > SIZE_T_MAX % 10U)) {
+ abs_exp = SIZE_T_MAX;
+ break;
+ }
+ abs_exp = 10U * abs_exp + dig;
+ c = *++s;
+ }
+ /* A valid exponent must have at least one digit. */
+ if (s != s1)
+ s00 = s;
+ }
+
+ /* Find e = nd0 + (-1)**esign * abs_exp, avoiding underflow/overflow and
+ clipping the result to the range [PY_SSIZE_T_MIN, PY_SSIZE_T_MAX].
+ Note that if the 'true' absolute exponent (i.e., without the SIZE_T_MAX
+ cap) is greater than SIZE_T_MAX then either e >= PY_SSIZE_T_MAX or e <=
+ PY_SSIZE_T_MIN, so we still end up doing the right thing here. */
+ if (esign) {
+ if (nd0 - (size_t)PY_SSIZE_T_MIN < abs_exp)
+ e = PY_SSIZE_T_MIN;
+ else
+ e = nd0 - abs_exp;
+ }
+ else {
+ if ((size_t)PY_SSIZE_T_MAX - nd0 < abs_exp)
+ e = PY_SSIZE_T_MAX;
+ else
+ e = nd0 + abs_exp;
+ }
+
+ /* Clip further to [INT_MIN, INT_MAX]. */
+ if (e < INT_MIN)
+ *exp = INT_MIN;
+ else if (e > INT_MAX)
+ *exp = INT_MAX;
+ else
+ *exp = e;
+
+ exit:
+ if (se)
+ *se = (char *)s00;
+ *pnd = nd;
+ *pnd0 = nd0;
+ *ps0 = (char *)s0;
+ return rv;
+}
+
/* The bigcomp function handles some hard cases for strtod, for inputs
with more than STRTOD_DIGLIM digits. It's called once an initial
estimate for the double corresponding to the input string has
@@ -1345,15 +1554,8 @@
bc is a struct containing information gathered during the parsing and
estimation steps of _Py_dg_strtod. Description of fields follows:
- bc->e0 gives the exponent of the input value, such that dv = (integer
- given by the bd->nd digits of s0) * 10**e0
-
- bc->nd gives the total number of significant digits of s0. It will
- be at least 1.
-
- bc->nd0 gives the number of significant digits of s0 before the
- decimal separator. If there's no decimal separator, bc->nd0 ==
- bc->nd.
+ bc->e0, bc->nd and bc->nd0 correspond to the values returned by
+ parse_numeric_string.
bc->scale is the value used to scale rv to avoid doing arithmetic with
subnormal values. It's either 0 or 2*P (=106).
@@ -1368,7 +1570,8 @@
bigcomp(U *rv, const char *s0, BCinfo *bc)
{
Bigint *b, *d;
- int b2, d2, dd, i, nd, nd0, odd, p2, p5;
+ int b2, d2, dd, i, odd, p2, p5;
+ Py_ssize_t nd, nd0;
nd = bc->nd;
nd0 = bc->nd0;
@@ -1443,7 +1646,7 @@
/* b/d >= 1 */
dd = -1;
else {
- i = 0;
+ Py_ssize_t i = 0;
for(;;) {
b = multadd(b, 10, 0);
if (b == NULL) {
@@ -1477,165 +1680,31 @@
double
_Py_dg_strtod(const char *s00, char **se)
{
- int bb2, bb5, bbe, bd2, bd5, bs2, c, dsign, e, e1, error;
- int esign, i, j, k, lz, nd, nd0, odd, sign;
- const char *s, *s0, *s1;
+ int bb2, bb5, bbe, bd2, bd5, bs2, dsign, e, e1, error, i, j, k, odd, sign;
+ Py_ssize_t nd, nd0, pos;
+ char *s0;
double aadj, aadj1;
U aadj2, adj, rv, rv0;
- ULong y, z, abs_exp;
+ ULong y, z;
Long L;
BCinfo bc;
Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
dval(&rv) = 0.;
- /* Start parsing. */
- c = *(s = s00);
-
- /* Parse optional sign, if present. */
- sign = 0;
- switch (c) {
- case '-':
- sign = 1;
- /* no break */
- case '+':
- c = *++s;
- }
-
- /* Skip leading zeros: lz is true iff there were leading zeros. */
- s1 = s;
- while (c == '0')
- c = *++s;
- lz = s != s1;
-
- /* Point s0 at the first nonzero digit (if any). nd0 will be the position
- of the point relative to s0. nd will be the total number of digits
- ignoring leading zeros. */
- s0 = s1 = s;
- while ('0' <= c && c <= '9')
- c = *++s;
- nd0 = nd = s - s1;
-
- /* Parse decimal point and following digits. */
- if (c == '.') {
- c = *++s;
- if (!nd) {
- s1 = s;
- while (c == '0')
- c = *++s;
- lz = lz || s != s1;
- nd0 -= s - s1;
- s0 = s;
- }
- s1 = s;
- while ('0' <= c && c <= '9')
- c = *++s;
- nd += s - s1;
- }
-
- /* Now lz is true if and only if there were leading zero digits, and nd
- gives the total number of digits ignoring leading zeros. A valid input
- must have at least one digit. */
- if (!nd && !lz) {
- if (se)
- *se = (char *)s00;
+ error = parse_numeric_string(s00, se, &s0, &nd, &nd0, &e, &sign);
+ if (error)
goto parse_error;
- }
-
- /* Parse exponent. */
- e = 0;
- if (c == 'e' || c == 'E') {
- s00 = s;
- c = *++s;
- /* Exponent sign. */
- esign = 0;
- switch (c) {
- case '-':
- esign = 1;
- /* no break */
- case '+':
- c = *++s;
- }
-
- /* Skip zeros. lz is true iff there are leading zeros. */
- s1 = s;
- while (c == '0')
- c = *++s;
- lz = s != s1;
-
- /* Get absolute value of the exponent. */
- s1 = s;
- abs_exp = 0;
- while ('0' <= c && c <= '9') {
- abs_exp = 10*abs_exp + (c - '0');
- c = *++s;
- }
-
- /* abs_exp will be correct modulo 2**32. But 10**9 < 2**32, so if
- there are at most 9 significant exponent digits then overflow is
- impossible. */
- if (s - s1 > 9 || abs_exp > MAX_ABS_EXP)
- e = (int)MAX_ABS_EXP;
- else
- e = (int)abs_exp;
- if (esign)
- e = -e;
-
- /* A valid exponent must have at least one digit. */
- if (s == s1 && !lz)
- s = s00;
- }
-
- /* Adjust exponent to take into account position of the point. */
- e += nd0;
- if (nd0 <= 0)
- nd0 = nd;
-
- /* Finished parsing. Set se to indicate how far we parsed */
- if (se)
- *se = (char *)s;
-
- /* If all digits were zero, exit with return value +-0.0. Otherwise,
- strip trailing zeros: scan back until we hit a nonzero digit. */
+ /* If all digits were zero, exit with return value +-0.0. */
if (!nd)
goto ret;
- for (i = nd; i > 0; ) {
- --i;
- if (s0[i < nd0 ? i : i+1] != '0') {
- ++i;
- break;
- }
- }
- nd = i;
- if (nd0 > nd)
- nd0 = nd;
- /* Summary of parsing results. After parsing, and dealing with zero
- * inputs, we have values s0, nd0, nd, e, sign, where:
- *
- * - s0 points to the first significant digit of the input string
- *
- * - nd is the total number of significant digits (here, and
- * below, 'significant digits' means the set of digits of the
- * significand of the input that remain after ignoring leading
- * and trailing zeros).
- *
- * - nd0 indicates the position of the decimal point, if present; it
- * satisfies 1 <= nd0 <= nd. The nd significant digits are in
- * s0[0:nd0] and s0[nd0+1:nd+1] using the usual Python half-open slice
- * notation. (If nd0 < nd, then s0[nd0] contains a '.' character; if
- * nd0 == nd, then s0[nd0] could be any non-digit character.)
- *
- * - e is the adjusted exponent: the absolute value of the number
- * represented by the original input string is n * 10**(e - nd), where
- * n is the integer represented by the concatenation of
- * s0[0:nd0] and s0[nd0+1:nd+1]
- *
- * - sign gives the sign of the input: 1 for negative, 0 for positive
- *
- * - the first and last significant digits are nonzero
- */
+ /* Overflow and underflow. */
+ if (e > Big_10_exp)
+ goto ovfl;
+ else if (e <= Tiny_10_exp)
+ goto undfl;
/* put first DBL_DIG+1 digits into integer y and z.
*
@@ -1649,11 +1718,11 @@
bc.e0 = e;
y = z = 0;
- for (i = 0; i < nd; i++) {
- if (i < 9)
- y = 10*y + s0[i < nd0 ? i : i+1] - '0';
- else if (i < DBL_DIG+1)
- z = 10*z + s0[i < nd0 ? i : i+1] - '0';
+ for (pos = 0; pos < nd; pos++) {
+ if (pos < 9)
+ y = 10*y + s0[pos < nd0 ? pos : pos+1] - '0';
+ else if (pos < DBL_DIG+1)
+ z = 10*z + s0[pos < nd0 ? pos : pos+1] - '0';
else
break;
}
@@ -1775,27 +1844,10 @@
/* in IEEE arithmetic. */
/* Truncate input to 18 significant digits, then discard any trailing
- zeros on the result by updating nd, nd0, e and y suitably. (There's
- no need to update z; it's not reused beyond this point.) */
- for (i = 18; i > 0; ) {
- /* scan back until we hit a nonzero digit. significant digit 'i'
- is s0[i] if i < nd0, s0[i+1] if i >= nd0. */
- --i;
- if (s0[i < nd0 ? i : i+1] != '0') {
- ++i;
- break;
- }
- }
- nd = i;
- if (nd0 > nd)
- nd0 = nd;
- if (nd < 9) { /* must recompute y */
- y = 0;
- for(i = 0; i < nd0; ++i)
- y = 10*y + s0[i] - '0';
- for(; i < nd; ++i)
- y = 10*y + s0[i+1] - '0';
- }
+ zeros on the result. */
+ nd = 18;
+ while (nd > 0 && s0[nd - 1 < nd0 ? nd - 1 : nd] == '0')
+ nd--;
}
bd0 = s2b(s0, nd0, nd);
if (bd0 == NULL)
More information about the Python-checkins
mailing list